Skip to content

Commit 3c14e62

Browse files
committed
Create a wrapper to ensure create_dataset is successful
1 parent 407b93f commit 3c14e62

File tree

7 files changed

+69
-72
lines changed

7 files changed

+69
-72
lines changed

libs/labelbox/tests/conftest.py

Lines changed: 31 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -502,33 +502,15 @@ def consensus_project_with_batch(
502502
@pytest.fixture
503503
def dataset(client, rand_gen):
504504
# Handle invalid default IAM integrations in test environments gracefully
505-
try:
506-
dataset = client.create_dataset(name=rand_gen(str))
507-
except ValueError as e:
508-
if "Integration is not valid" in str(e):
509-
# Fallback to creating dataset without IAM integration for tests
510-
dataset = client.create_dataset(
511-
name=rand_gen(str), iam_integration=None
512-
)
513-
else:
514-
raise e
505+
dataset = create_dataset_robust(client, name=rand_gen(str))
515506
yield dataset
516507
dataset.delete()
517508

518509

519510
@pytest.fixture(scope="function")
520511
def unique_dataset(client, rand_gen):
521512
# Handle invalid default IAM integrations in test environments gracefully
522-
try:
523-
dataset = client.create_dataset(name=rand_gen(str))
524-
except ValueError as e:
525-
if "Integration is not valid" in str(e):
526-
# Fallback to creating dataset without IAM integration for tests
527-
dataset = client.create_dataset(
528-
name=rand_gen(str), iam_integration=None
529-
)
530-
else:
531-
raise e
513+
dataset = create_dataset_robust(client, name=rand_gen(str))
532514
yield dataset
533515
dataset.delete()
534516

@@ -878,16 +860,7 @@ def func(project):
878860
@pytest.fixture
879861
def initial_dataset(client, rand_gen):
880862
# Handle invalid default IAM integrations in test environments gracefully
881-
try:
882-
dataset = client.create_dataset(name=rand_gen(str))
883-
except ValueError as e:
884-
if "Integration is not valid" in str(e):
885-
# Fallback to creating dataset without IAM integration for tests
886-
dataset = client.create_dataset(
887-
name=rand_gen(str), iam_integration=None
888-
)
889-
else:
890-
raise e
863+
dataset = create_dataset_robust(client, name=rand_gen(str))
891864
yield dataset
892865

893866
dataset.delete()
@@ -896,16 +869,7 @@ def initial_dataset(client, rand_gen):
896869
@pytest.fixture
897870
def video_data(client, rand_gen, video_data_row, wait_for_data_row_processing):
898871
# Handle invalid default IAM integrations in test environments gracefully
899-
try:
900-
dataset = client.create_dataset(name=rand_gen(str))
901-
except ValueError as e:
902-
if "Integration is not valid" in str(e):
903-
# Fallback to creating dataset without IAM integration for tests
904-
dataset = client.create_dataset(
905-
name=rand_gen(str), iam_integration=None
906-
)
907-
else:
908-
raise e
872+
dataset = create_dataset_robust(client, name=rand_gen(str))
909873
data_row_ids = []
910874
data_row = dataset.create_data_row(video_data_row)
911875
data_row = wait_for_data_row_processing(client, data_row)
@@ -925,16 +889,7 @@ def create_video_data_row(rand_gen):
925889
@pytest.fixture
926890
def video_data_100_rows(client, rand_gen, wait_for_data_row_processing):
927891
# Handle invalid default IAM integrations in test environments gracefully
928-
try:
929-
dataset = client.create_dataset(name=rand_gen(str))
930-
except ValueError as e:
931-
if "Integration is not valid" in str(e):
932-
# Fallback to creating dataset without IAM integration for tests
933-
dataset = client.create_dataset(
934-
name=rand_gen(str), iam_integration=None
935-
)
936-
else:
937-
raise e
892+
dataset = create_dataset_robust(client, name=rand_gen(str))
938893
data_row_ids = []
939894
for _ in range(100):
940895
data_row = dataset.create_data_row(create_video_data_row(rand_gen))
@@ -1326,3 +1281,29 @@ def module_teardown_helpers():
13261281
@pytest.fixture
13271282
def label_helpers():
13281283
return LabelHelpers()
1284+
1285+
1286+
def create_dataset_robust(client, **kwargs):
1287+
"""
1288+
Robust dataset creation that handles invalid default IAM integrations gracefully.
1289+
1290+
This is a helper function for tests that need to create datasets directly
1291+
instead of using fixtures. It falls back to creating datasets without
1292+
IAM integration when the default integration is invalid.
1293+
1294+
Args:
1295+
client: Labelbox client instance
1296+
**kwargs: Arguments to pass to create_dataset
1297+
1298+
Returns:
1299+
Dataset: Created dataset
1300+
"""
1301+
try:
1302+
return client.create_dataset(**kwargs)
1303+
except ValueError as e:
1304+
if "Integration is not valid" in str(e):
1305+
# Fallback to creating dataset without IAM integration for tests
1306+
kwargs["iam_integration"] = None
1307+
return client.create_dataset(**kwargs)
1308+
else:
1309+
raise e

libs/labelbox/tests/data/annotation_import/conftest.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from labelbox.schema.model_run import ModelRun
1212
from labelbox.schema.ontology import Ontology
1313
from labelbox.schema.project import Project
14+
from ...conftest import create_dataset_robust
1415

1516
"""
1617
The main fixtures of this library are configured_project and configured_project_by_global_key. Both fixtures generate data rows with a parametrize media type. They create the amount of data rows equal to the DATA_ROW_COUNT variable below. The data rows are generated with a factory fixture that returns a function that allows you to pass a global key. The ontologies are generated normalized and based on the MediaType given (i.e. only features supported by MediaType are created). This ontology is later used to obtain the correct annotations with the prediction_id_mapping and corresponding inferences. Each data row will have all possible annotations attached supported for the MediaType.
@@ -653,7 +654,7 @@ def _create_response_creation_project(
653654
) -> Tuple[Project, Ontology, Dataset]:
654655
"For response creation projects"
655656

656-
dataset = client.create_dataset(name=rand_gen(str))
657+
dataset = create_dataset_robust(client, name=rand_gen(str))
657658

658659
project = client.create_response_creation_project(
659660
name=f"{ontology_kind}-{rand_gen(str)}"
@@ -695,7 +696,7 @@ def _create_response_creation_project(
695696
def llm_prompt_response_creation_dataset_with_data_row(
696697
client: Client, rand_gen
697698
):
698-
dataset = client.create_dataset(name=rand_gen(str))
699+
dataset = create_dataset_robust(client, name=rand_gen(str))
699700
global_key = str(uuid.uuid4())
700701

701702
convo_data = {
@@ -752,7 +753,7 @@ def _create_prompt_response_project(
752753
def _create_offline_mmc_project(
753754
client: Client, rand_gen, data_row_json, normalized_ontology
754755
) -> Tuple[Project, Ontology, Dataset]:
755-
dataset = client.create_dataset(name=rand_gen(str))
756+
dataset = create_dataset_robust(client, name=rand_gen(str))
756757

757758
project = client.create_offline_model_evaluation_project(
758759
name=f"offline-mmc-{rand_gen(str)}",
@@ -797,7 +798,7 @@ def _create_project(
797798
) -> Tuple[Project, Ontology, Dataset]:
798799
"""Shared function to configure project for integration tests"""
799800

800-
dataset = client.create_dataset(name=rand_gen(str))
801+
dataset = create_dataset_robust(client, name=rand_gen(str))
801802

802803
project = client.create_project(
803804
name=f"{media_type}-{rand_gen(str)}", media_type=media_type

libs/labelbox/tests/data/annotation_import/test_relationships.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
)
2323
from labelbox.data.serialization.ndjson import NDJsonConverter
2424
import pytest
25+
from ...conftest import create_dataset_robust
2526

2627

2728
def validate_iso_format(date_string: str):
@@ -163,7 +164,7 @@ def configured_project(
163164

164165
dataset = None
165166

166-
dataset = client.create_dataset(name=rand_gen(str))
167+
dataset = create_dataset_robust(client, name=rand_gen(str))
167168

168169
project = client.create_project(
169170
name=f"{media_type}-{rand_gen(str)}", media_type=media_type

libs/labelbox/tests/integration/test_batch.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,14 @@
1010
)
1111

1212
from labelbox import Dataset, Project
13+
from ..conftest import create_dataset_robust
1314

1415

1516
def get_data_row_ids(ds: Dataset):
16-
return [dr.uid for dr in list(ds.data_rows())]
17+
export_task = ds.export()
18+
export_task.wait_till_done()
19+
stream = export_task.get_buffered_stream()
20+
return [dr.json["data_row"]["id"] for dr in stream]
1721

1822

1923
def test_create_batch(project: Project, big_dataset_data_row_ids: List[str]):
@@ -243,7 +247,7 @@ def test_list_all_batches(project: Project, client, image_url: str):
243247
datasets = []
244248

245249
for assets in data:
246-
dataset = client.create_dataset(name=str(uuid4()))
250+
dataset = create_dataset_robust(client, name=str(uuid4()))
247251
create_data_rows_task = dataset.create_data_rows(assets)
248252
create_data_rows_task.wait_till_done()
249253
datasets.append(dataset)

libs/labelbox/tests/integration/test_dataset.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
from labelbox.schema.internal.descriptor_file_creator import (
99
DescriptorFileCreator,
1010
)
11+
from ..conftest import create_dataset_robust
1112

1213

1314
def test_dataset(client, rand_gen):
1415
# confirm dataset can be created
1516
name = rand_gen(str)
16-
dataset = client.create_dataset(name=name)
17+
dataset = create_dataset_robust(client, name=name)
1718
assert dataset.name == name
1819
assert dataset.created_by() == client.get_user()
1920
assert dataset.organization() == client.get_organization()
@@ -52,11 +53,14 @@ def test_dataset(client, rand_gen):
5253
def dataset_for_filtering(client, rand_gen):
5354
name_1 = rand_gen(str)
5455
name_2 = rand_gen(str)
55-
d1 = client.create_dataset(name=name_1)
56-
d2 = client.create_dataset(name=name_2)
56+
d1 = create_dataset_robust(client, name=name_1)
57+
d2 = create_dataset_robust(client, name=name_2)
5758

5859
yield name_1, d1, name_2, d2
5960

61+
d1.delete()
62+
d2.delete()
63+
6064

6165
def test_dataset_filtering(client, dataset_for_filtering):
6266
name_1, d1, name_2, d2 = dataset_for_filtering

libs/labelbox/tests/integration/test_delegated_access.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
GcpIamIntegrationSettings,
1212
AzureIamIntegrationSettings,
1313
)
14+
from ..conftest import create_dataset_robust
1415

1516

1617
def delete_iam_integration(client, iam_integration_id: str):
@@ -382,7 +383,7 @@ def test_default_integration():
382383
Org ID: cl269lvvj78b50zau34s4550z
383384
384385
client = Client(api_key=os.environ.get("DA_GCP_LABELBOX_API_KEY"))
385-
ds = client.create_dataset(name="new_ds")
386+
ds = create_dataset_robust(client, name="new_ds")
386387
dr = ds.create_data_row(
387388
row_data="gs://jtso-gcs-sdk-da-tests/nikita-samokhin-D6QS6iv_CTY-unsplash.jpg"
388389
)
@@ -414,7 +415,9 @@ def test_non_default_integration():
414415
inte for inte in integrations if "aws-da-test-bucket" in inte.name
415416
][0]
416417
assert integration.valid
417-
ds = client.create_dataset(iam_integration=integration, name="new_ds")
418+
ds = create_dataset_robust(
419+
client, iam_integration=integration, name="new_ds"
420+
)
418421
assert ds.iam_integration().name == "aws-da-test-bucket"
419422
dr = ds.create_data_row(
420423
row_data="https://jtso-aws-da-sdk-tests.s3.us-east-2.amazonaws.com/adrian-yu-qkN4D3Rf1gw-unsplash.jpg"
@@ -424,7 +427,7 @@ def test_non_default_integration():
424427

425428

426429
def test_no_integration(client, image_url):
427-
ds = client.create_dataset(iam_integration=None, name="new_ds")
430+
ds = create_dataset_robust(client, iam_integration=None, name="new_ds")
428431
assert ds.iam_integration() is None
429432
dr = ds.create_data_row(row_data=image_url)
430433
assert requests.get(dr.row_data).status_code == 200
@@ -433,7 +436,7 @@ def test_no_integration(client, image_url):
433436

434437
@pytest.mark.skip(reason="Assumes state of account doesn't have integration")
435438
def test_no_default_integration(client):
436-
ds = client.create_dataset(name="new_ds")
439+
ds = create_dataset_robust(client, name="new_ds")
437440
assert ds.iam_integration() is None
438441
ds.delete()
439442

@@ -466,8 +469,8 @@ def test_add_integration_from_object():
466469
if "aws-da-test-bucket" in integration.name
467470
][0]
468471

469-
ds = client.create_dataset(
470-
iam_integration=None, name=f"integration_add_obj-{uuid.uuid4()}"
472+
ds = create_dataset_robust(
473+
client, iam_integration=None, name=f"integration_add_obj-{uuid.uuid4()}"
471474
)
472475

473476
# Test set integration with object
@@ -506,8 +509,8 @@ def test_add_integration_from_uid():
506509
if "aws-da-test-bucket" in integration.name
507510
][0]
508511

509-
ds = client.create_dataset(
510-
iam_integration=None, name=f"integration_add_id-{uuid.uuid4()}"
512+
ds = create_dataset_robust(
513+
client, iam_integration=None, name=f"integration_add_id-{uuid.uuid4()}"
511514
)
512515

513516
# Test set integration with integration id
@@ -552,8 +555,10 @@ def test_integration_remove():
552555
if "aws-da-test-bucket" in integration.name
553556
][0]
554557

555-
ds = client.create_dataset(
556-
iam_integration=integration, name=f"integration_remove-{uuid.uuid4()}"
558+
ds = create_dataset_robust(
559+
client,
560+
iam_integration=integration,
561+
name=f"integration_remove-{uuid.uuid4()}",
557562
)
558563

559564
# Test unset integration

libs/labelbox/tests/integration/test_pagination.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
import pytest
44

55
from labelbox.schema.dataset import Dataset
6+
from ..conftest import create_dataset_robust
67

78

89
@pytest.fixture
910
def data_for_dataset_order_test(client, rand_gen):
1011
name = rand_gen(str)
11-
dataset1 = client.create_dataset(name=name)
12-
dataset2 = client.create_dataset(name=name)
12+
dataset1 = create_dataset_robust(client, name=name)
13+
dataset2 = create_dataset_robust(client, name=name)
1314

1415
yield name
1516

0 commit comments

Comments
 (0)