Skip to content

Commit f656d6b

Browse files
authored
Merge pull request #1001 from Labelbox/develop
Release v3.41.0
2 parents 7c7bf5a + 1fbe11d commit f656d6b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+2282
-156
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
# Changelog
22

3+
# Version 3.41.0 (2023-03-15)
4+
5+
## Added
6+
* New data classes for creating labels: `AudioData`, `ConversationData`, `DicomData`, `DocumentData`, `HTMLData`
7+
* New `DocumentEntity` annotation type class
8+
* New parameter `last_activity_end` to `Project.export_labels()`
9+
10+
## Notebooks
11+
* Updated `annotation_import/pdf.ipynb` with example use of `DocumentEntity` class
12+
313
# Version 3.40.1 (2023-03-10)
414

515
## Fixed

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
copyright = '2021, Labelbox'
2222
author = 'Labelbox'
2323

24-
release = '3.40.1'
24+
release = '3.41.0'
2525

2626
# -- General configuration ---------------------------------------------------
2727

examples/annotation_import/pdf.ipynb

Lines changed: 52 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"nbformat": 4,
3-
"nbformat_minor": 0,
3+
"nbformat_minor": 1,
44
"metadata": {},
55
"cells": [
66
{
@@ -77,13 +77,21 @@
7777
{
7878
"metadata": {},
7979
"source": [
80-
"import labelbox as lb\n",
81-
"import labelbox.types as lb_types\n",
80+
"! pip install -e ../.."
81+
],
82+
"cell_type": "code",
83+
"outputs": [],
84+
"execution_count": null
85+
},
86+
{
87+
"metadata": {},
88+
"source": [
89+
"from typing import cast\n",
8290
"import uuid\n",
8391
"from uuid import uuid4\n",
84-
"import json\n",
85-
"import uuid\n",
86-
"import numpy as np\n",
92+
"\n",
93+
"import labelbox as lb\n",
94+
"import labelbox.types as lb_types\n",
8795
"from labelbox.schema.queue_mode import QueueMode"
8896
],
8997
"cell_type": "code",
@@ -297,25 +305,7 @@
297305
"print(data_row)"
298306
],
299307
"cell_type": "code",
300-
"outputs": [
301-
{
302-
"name": "stdout",
303-
"output_type": "stream",
304-
"text": [
305-
"<DataRow {\n",
306-
" \"created_at\": \"2023-03-09 15:00:03+00:00\",\n",
307-
" \"external_id\": null,\n",
308-
" \"global_key\": \"ae81ff87-ea8e-46f2-aaf0-766e3e7de1c5\",\n",
309-
" \"media_attributes\": {},\n",
310-
" \"metadata\": [],\n",
311-
" \"metadata_fields\": [],\n",
312-
" \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n",
313-
" \"uid\": \"clf18jp0a0wzr07zn8t5457h8\",\n",
314-
" \"updated_at\": \"2023-03-09 15:00:03+00:00\"\n",
315-
"}>\n"
316-
]
317-
}
318-
],
308+
"outputs": [],
319309
"execution_count": null
320310
},
321311
{
@@ -438,23 +428,12 @@
438428
"source": [
439429
"project.create_batch(\n",
440430
" \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n",
441-
" dataset.export_data_rows(), # A list of data rows or data row ids\n",
431+
" list(dataset.export_data_rows()), # A list of data rows or data row ids\n",
442432
" 5 # priority between 1(Highest) - 5(lowest)\n",
443433
")"
444434
],
445435
"cell_type": "code",
446-
"outputs": [
447-
{
448-
"data": {
449-
"text/plain": [
450-
"<Batch ID: 1649cba0-be8b-11ed-add0-43b68483b422>"
451-
]
452-
},
453-
"execution_count": 115,
454-
"metadata": {},
455-
"output_type": "execute_result"
456-
}
457-
],
436+
"outputs": [],
458437
"execution_count": null
459438
},
460439
{
@@ -477,6 +456,7 @@
477456
"import requests\n",
478457
"import json\n",
479458
"\n",
459+
"\n",
480460
"## To learn how to generate a text layer for your documents please refer to the following repositories/files: \n",
481461
"# https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/gcloud/gcp-vision-to-lb-text-layer.py\n",
482462
"# https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/adobe/adobe-ocr-to-lb-text-layer.py\n",
@@ -486,13 +466,18 @@
486466
"## Fetch the content of the text layer\n",
487467
"res = requests.get(text_layer) \n",
488468
"\n",
469+
"\n",
470+
"\n",
489471
"## Parse the text layer\n",
472+
"text_selections = []\n",
490473
"for obj in json.loads(res.text):\n",
491474
" for group in obj['groups']: \n",
492475
" ## Find the text group that we are interested in annotating\n",
493476
" if group['content'] == \"Metal-insulator (MI) transitions have been one of the\":\n",
494477
" ## We now need all the tokens associated with each word in this text group\n",
495478
" list_tokens = [x['id'] for x in group['tokens']]\n",
479+
" document_text_selection = lb_types.DocumentTextSelection(groupId=group['id'], tokenIds=list_tokens, page=1)\n",
480+
" text_selections.append(document_text_selection)\n",
496481
" entities_annotations_ndjson.update(\n",
497482
" {\n",
498483
" \"textSelections\": [\n",
@@ -504,6 +489,13 @@
504489
" ]\n",
505490
" }\n",
506491
" )\n",
492+
"entities_annotation_document_entity = lb_types.DocumentEntity(name=\"named_entity\", \n",
493+
" textSelections = text_selections)\n",
494+
"entities_annotation = lb_types.ObjectAnnotation(name=\"named_entity\",\n",
495+
" value=entities_annotation_document_entity)\n",
496+
" \n",
497+
"print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n",
498+
"print(f\"entities_annotation={entities_annotation}\")\n",
507499
" "
508500
],
509501
"cell_type": "code",
@@ -514,7 +506,7 @@
514506
"metadata": {},
515507
"source": [
516508
"#### Python annotation\n",
517-
"Here we create the complete label ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. Note that only a handful of python annotation types are supported for PDF documents."
509+
"Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. Note that only a handful of python annotation types are supported for PDF documents."
518510
],
519511
"cell_type": "markdown"
520512
},
@@ -523,12 +515,13 @@
523515
"source": [
524516
"# create a Label\n",
525517
"\n",
526-
"label = []\n",
518+
"labels = []\n",
527519
"for data_row in dataset.export_data_rows():\n",
528-
" label.append(lb_types.Label(\n",
520+
" labels.append(lb_types.Label(\n",
529521
" data=lb_types.TextData(\n",
530522
" uid=data_row.uid),\n",
531523
" annotations = [\n",
524+
" entities_annotation,\n",
532525
" checklist_annotation, \n",
533526
" text_annotation,\n",
534527
" radio_annotation\n",
@@ -544,7 +537,7 @@
544537
"metadata": {},
545538
"source": [
546539
"#### NDJson annotations\n",
547-
"Here we create the complete label ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above."
540+
"Here we create the complete labels ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above."
548541
],
549542
"cell_type": "markdown"
550543
},
@@ -589,26 +582,24 @@
589582
{
590583
"metadata": {},
591584
"source": [
585+
"# upload_job = lb.MALPredictionImport.create_from_objects(\n",
586+
"# client = client,\n",
587+
"# project_id = project.uid,\n",
588+
"# name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n",
589+
"# predictions=ndjson_annotation)\n",
590+
"\n",
592591
"upload_job = lb.MALPredictionImport.create_from_objects(\n",
593592
" client = client,\n",
594593
" project_id = project.uid,\n",
595594
" name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n",
596-
" predictions=ndjson_annotation)\n",
595+
" predictions=labels)\n",
597596
"\n",
598597
"upload_job.wait_until_done()\n",
599598
"# Errors will appear for annotation uploads that failed.\n",
600599
"print(\"Errors:\", upload_job.errors)"
601600
],
602601
"cell_type": "code",
603-
"outputs": [
604-
{
605-
"name": "stdout",
606-
"output_type": "stream",
607-
"text": [
608-
"Errors: []\n"
609-
]
610-
}
611-
],
602+
"outputs": [],
612603
"execution_count": null
613604
},
614605
{
@@ -631,16 +622,15 @@
631622
"print(\"Errors:\", upload_job.errors)"
632623
],
633624
"cell_type": "code",
634-
"outputs": [
635-
{
636-
"name": "stdout",
637-
"output_type": "stream",
638-
"text": [
639-
"Errors: []\n"
640-
]
641-
}
642-
],
625+
"outputs": [],
626+
"execution_count": null
627+
},
628+
{
629+
"metadata": {},
630+
"source": [],
631+
"cell_type": "code",
632+
"outputs": [],
643633
"execution_count": null
644634
}
645635
]
646-
}
636+
}

labelbox/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
name = "labelbox"
2-
__version__ = "3.40.1"
2+
__version__ = "3.41.0"
33

44
from labelbox.client import Client
55
from labelbox.schema.project import Project
66
from labelbox.schema.model import Model
77
from labelbox.schema.bulk_import_request import BulkImportRequest
8-
from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport
8+
from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport, MEAToMALPredictionImport
99
from labelbox.schema.dataset import Dataset
1010
from labelbox.schema.data_row import DataRow
1111
from labelbox.schema.label import Label

labelbox/data/annotation_types/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
from .annotation import ObjectAnnotation
1111
from .annotation import VideoObjectAnnotation
1212

13+
from .ner import ConversationEntity
14+
from .ner import DocumentEntity
15+
from .ner import DocumentTextSelection
1316
from .ner import TextEntity
1417

1518
from .classification import Checklist
@@ -18,6 +21,11 @@
1821
from .classification import Radio
1922
from .classification import Text
2023

24+
from .data import AudioData
25+
from .data import ConversationData
26+
from .data import DicomData
27+
from .data import DocumentData
28+
from .data import HTMLData
2129
from .data import ImageData
2230
from .data import MaskData
2331
from .data import TextData

labelbox/data/annotation_types/annotation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from .classification import Checklist, Dropdown, Radio, Text
77
from .feature import FeatureSchema
88
from .geometry import Geometry, Rectangle, Point
9-
from .ner import TextEntity
9+
from .ner import DocumentEntity, TextEntity, ConversationEntity
1010

1111

1212
class BaseAnnotation(FeatureSchema, abc.ABC):
@@ -51,7 +51,7 @@ class ObjectAnnotation(BaseAnnotation, ConfidenceMixin):
5151
classifications (Optional[List[ClassificationAnnotation]]): Optional sub classification of the annotation
5252
extra (Dict[str, Any])
5353
"""
54-
value: Union[TextEntity, Geometry]
54+
value: Union[TextEntity, ConversationEntity, DocumentEntity, Geometry]
5555
classifications: List[ClassificationAnnotation] = []
5656

5757

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1+
from .audio import AudioData
2+
from .conversation import ConversationData
3+
from .dicom import DicomData
4+
from .document import DocumentData
5+
from .html import HTMLData
16
from .raster import ImageData
27
from .raster import MaskData
38
from .text import TextData
4-
from .video import VideoData
9+
from .video import VideoData
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .base_data import BaseData
2+
3+
4+
class AudioData(BaseData):
5+
...

labelbox/data/annotation_types/data/base_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@ class BaseData(BaseModel, ABC):
1111
"""
1212
external_id: Optional[str] = None
1313
uid: Optional[str] = None
14+
global_key: Optional[str] = None
1415
media_attributes: Optional[Dict[str, Any]] = None
1516
metadata: Optional[List[Dict[str, Any]]] = None
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .base_data import BaseData
2+
3+
4+
class ConversationData(BaseData):
5+
...

0 commit comments

Comments
 (0)