Labelbox
diff --git a/‎CHANGELOG.md
Lines changed: 10 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 10 additions & 0 deletions
diff --git a/‎docs/source/conf.py
Lines changed: 1 addition & 1 deletion b/‎docs/source/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/annotation_import/pdf.ipynb
Lines changed: 52 additions & 62 deletions b/‎examples/annotation_import/pdf.ipynb
Lines changed: 52 additions & 62 deletions
diff --git a/‎labelbox/__init__.py
Lines changed: 2 additions & 2 deletions b/‎labelbox/__init__.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎labelbox/data/annotation_types/__init__.py
Lines changed: 8 additions & 0 deletions b/‎labelbox/data/annotation_types/__init__.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎labelbox/data/annotation_types/annotation.py
Lines changed: 2 additions & 2 deletions b/‎labelbox/data/annotation_types/annotation.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎labelbox/data/annotation_types/data/__init__.py
Lines changed: 6 additions & 1 deletion b/‎labelbox/data/annotation_types/data/__init__.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎labelbox/data/annotation_types/data/audio.py
Lines changed: 5 additions & 0 deletions b/‎labelbox/data/annotation_types/data/audio.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎labelbox/data/annotation_types/data/base_data.py
Lines changed: 1 addition & 0 deletions b/‎labelbox/data/annotation_types/data/base_data.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎labelbox/data/annotation_types/data/conversation.py
Lines changed: 5 additions & 0 deletions b/‎labelbox/data/annotation_types/data/conversation.py
Lines changed: 5 additions & 0 deletions
@@ -1,5 +1,15 @@
 # Changelog
 
+# Version 3.41.0 (2023-03-15)
+
+## Added
+* New data classes for creating labels: `AudioData`, `ConversationData`, `DicomData`, `DocumentData`, `HTMLData`
+* New `DocumentEntity` annotation type class
+* New parameter `last_activity_end` to `Project.export_labels()`
+
+## Notebooks
+* Updated `annotation_import/pdf.ipynb` with example use of `DocumentEntity` class
+
 # Version 3.40.1 (2023-03-10)
 
 ## Fixed
 
@@ -21,7 +21,7 @@
 copyright = '2021, Labelbox'
 author = 'Labelbox'
 
-release = '3.40.1'
+release = '3.41.0'
 
 # -- General configuration ---------------------------------------------------
 
 
@@ -1,6 +1,6 @@
 {
   "nbformat": 4,
-  "nbformat_minor": 0,
+  "nbformat_minor": 1,
   "metadata": {},
   "cells": [
     {
@@ -77,13 +77,21 @@
     {
       "metadata": {},
       "source": [
-        "import labelbox as lb\n",
-        "import labelbox.types as lb_types\n",
+        "! pip install -e ../.."
+      ],
+      "cell_type": "code",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "metadata": {},
+      "source": [
+        "from typing import cast\n",
         "import uuid\n",
         "from uuid import uuid4\n",
-        "import json\n",
-        "import uuid\n",
-        "import numpy as np\n",
+        "\n",
+        "import labelbox as lb\n",
+        "import labelbox.types as lb_types\n",
         "from labelbox.schema.queue_mode import QueueMode"
       ],
       "cell_type": "code",
@@ -297,25 +305,7 @@
         "print(data_row)"
       ],
       "cell_type": "code",
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "<DataRow {\n",
-            "    \"created_at\": \"2023-03-09 15:00:03+00:00\",\n",
-            "    \"external_id\": null,\n",
-            "    \"global_key\": \"ae81ff87-ea8e-46f2-aaf0-766e3e7de1c5\",\n",
-            "    \"media_attributes\": {},\n",
-            "    \"metadata\": [],\n",
-            "    \"metadata_fields\": [],\n",
-            "    \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n",
-            "    \"uid\": \"clf18jp0a0wzr07zn8t5457h8\",\n",
-            "    \"updated_at\": \"2023-03-09 15:00:03+00:00\"\n",
-            "}>\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "execution_count": null
     },
     {
@@ -438,23 +428,12 @@
       "source": [
         "project.create_batch(\n",
         "  \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n",
-        "  dataset.export_data_rows(), # A list of data rows or data row ids\n",
+        "  list(dataset.export_data_rows()), # A list of data rows or data row ids\n",
         "  5 # priority between 1(Highest) - 5(lowest)\n",
         ")"
       ],
       "cell_type": "code",
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "<Batch ID: 1649cba0-be8b-11ed-add0-43b68483b422>"
-            ]
-          },
-          "execution_count": 115,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "execution_count": null
     },
     {
@@ -477,6 +456,7 @@
         "import requests\n",
         "import json\n",
         "\n",
+        "\n",
         "## To learn how to generate a text layer for your documents please refer to the following repositories/files: \n",
         "# https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/gcloud/gcp-vision-to-lb-text-layer.py\n",
         "# https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/adobe/adobe-ocr-to-lb-text-layer.py\n",
@@ -486,13 +466,18 @@
         "## Fetch the content of the text layer\n",
         "res = requests.get(text_layer) \n",
         "\n",
+        "\n",
+        "\n",
         "## Parse the text layer\n",
+        "text_selections = []\n",
         "for obj in json.loads(res.text):\n",
         "  for group in obj['groups']: \n",
         "    ## Find the text group that we are interested in annotating\n",
         "    if group['content'] == \"Metal-insulator (MI) transitions have been one of the\":\n",
         "      ## We now need all the tokens associated with each word in this text group\n",
         "      list_tokens = [x['id'] for x in group['tokens']]\n",
+        "      document_text_selection = lb_types.DocumentTextSelection(groupId=group['id'], tokenIds=list_tokens, page=1)\n",
+        "      text_selections.append(document_text_selection)\n",
         "      entities_annotations_ndjson.update(\n",
         "        {\n",
         "          \"textSelections\": [\n",
@@ -504,6 +489,13 @@
         "          ]\n",
         "        }\n",
         "      )\n",
+        "entities_annotation_document_entity = lb_types.DocumentEntity(name=\"named_entity\", \n",
+        "                                          textSelections = text_selections)\n",
+        "entities_annotation = lb_types.ObjectAnnotation(name=\"named_entity\",\n",
+        "                                                value=entities_annotation_document_entity)\n",
+        "        \n",
+        "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n",
+        "print(f\"entities_annotation={entities_annotation}\")\n",
         "  "
       ],
       "cell_type": "code",
@@ -514,7 +506,7 @@
       "metadata": {},
       "source": [
         "#### Python annotation\n",
-        "Here we create the complete label ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. Note that only a handful of python annotation types are supported for PDF documents."
+        "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. Note that only a handful of python annotation types are supported for PDF documents."
       ],
       "cell_type": "markdown"
     },
@@ -523,12 +515,13 @@
       "source": [
         "# create a Label\n",
         "\n",
-        "label = []\n",
+        "labels = []\n",
         "for data_row in dataset.export_data_rows():\n",
-        "  label.append(lb_types.Label(\n",
+        "  labels.append(lb_types.Label(\n",
         "      data=lb_types.TextData(\n",
         "          uid=data_row.uid),\n",
         "      annotations = [\n",
+        "          entities_annotation,\n",
         "          checklist_annotation, \n",
         "          text_annotation,\n",
         "          radio_annotation\n",
@@ -544,7 +537,7 @@
       "metadata": {},
       "source": [
         "#### NDJson annotations\n",
-        "Here we create the complete label ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above."
+        "Here we create the complete labels ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above."
       ],
       "cell_type": "markdown"
     },
@@ -589,26 +582,24 @@
     {
       "metadata": {},
       "source": [
+        "# upload_job = lb.MALPredictionImport.create_from_objects(\n",
+        "#     client = client,\n",
+        "#     project_id = project.uid,\n",
+        "#     name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n",
+        "#     predictions=ndjson_annotation)\n",
+        "\n",
         "upload_job = lb.MALPredictionImport.create_from_objects(\n",
         "    client = client,\n",
         "    project_id = project.uid,\n",
         "    name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n",
-        "    predictions=ndjson_annotation)\n",
+        "    predictions=labels)\n",
         "\n",
         "upload_job.wait_until_done()\n",
         "# Errors will appear for annotation uploads that failed.\n",
         "print(\"Errors:\", upload_job.errors)"
       ],
       "cell_type": "code",
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Errors: []\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "execution_count": null
     },
     {
@@ -631,16 +622,15 @@
         "print(\"Errors:\", upload_job.errors)"
       ],
       "cell_type": "code",
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Errors: []\n"
-          ]
-        }
-      ],
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "metadata": {},
+      "source": [],
+      "cell_type": "code",
+      "outputs": [],
       "execution_count": null
     }
   ]
-}
+}
@@ -1,11 +1,11 @@
 name = "labelbox"
-__version__ = "3.40.1"
+__version__ = "3.41.0"
 
 from labelbox.client import Client
 from labelbox.schema.project import Project
 from labelbox.schema.model import Model
 from labelbox.schema.bulk_import_request import BulkImportRequest
-from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport
+from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport, MEAToMALPredictionImport
 from labelbox.schema.dataset import Dataset
 from labelbox.schema.data_row import DataRow
 from labelbox.schema.label import Label
 
@@ -10,6 +10,9 @@
 from .annotation import ObjectAnnotation
 from .annotation import VideoObjectAnnotation
 
+from .ner import ConversationEntity
+from .ner import DocumentEntity
+from .ner import DocumentTextSelection
 from .ner import TextEntity
 
 from .classification import Checklist
@@ -18,6 +21,11 @@
 from .classification import Radio
 from .classification import Text
 
+from .data import AudioData
+from .data import ConversationData
+from .data import DicomData
+from .data import DocumentData
+from .data import HTMLData
 from .data import ImageData
 from .data import MaskData
 from .data import TextData
 
@@ -6,7 +6,7 @@
 from .classification import Checklist, Dropdown, Radio, Text
 from .feature import FeatureSchema
 from .geometry import Geometry, Rectangle, Point
-from .ner import TextEntity
+from .ner import DocumentEntity, TextEntity, ConversationEntity
 
 
 class BaseAnnotation(FeatureSchema, abc.ABC):
@@ -51,7 +51,7 @@ class ObjectAnnotation(BaseAnnotation, ConfidenceMixin):
         classifications (Optional[List[ClassificationAnnotation]]): Optional sub classification of the annotation
         extra (Dict[str, Any])
     """
-    value: Union[TextEntity, Geometry]
+    value: Union[TextEntity, ConversationEntity, DocumentEntity, Geometry]
     classifications: List[ClassificationAnnotation] = []
 
 
 
@@ -1,4 +1,9 @@
+from .audio import AudioData
+from .conversation import ConversationData
+from .dicom import DicomData
+from .document import DocumentData
+from .html import HTMLData
 from .raster import ImageData
 from .raster import MaskData
 from .text import TextData
-from .video import VideoData
+from .video import VideoData
@@ -0,0 +1,5 @@
+from .base_data import BaseData
+
+
+class AudioData(BaseData):
+    ...
@@ -11,5 +11,6 @@ class BaseData(BaseModel, ABC):
     """
     external_id: Optional[str] = None
     uid: Optional[str] = None
+    global_key: Optional[str] = None
     media_attributes: Optional[Dict[str, Any]] = None
     metadata: Optional[List[Dict[str, Any]]] = None
@@ -0,0 +1,5 @@
+from .base_data import BaseData
+
+
+class ConversationData(BaseData):
+    ...
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +from .base_data import BaseData
++
++
 +class AudioData(BaseData):
 +    ...