Skip to content

Virtual Datasets #8708

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 40 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
9f89d38
Explore remote datasets as virtual datasets
frcroth Jun 23, 2025
42101a9
Do not have virtual remote datasets deleted
frcroth Jun 23, 2025
18dfe98
Put mag in db
frcroth Jun 25, 2025
9c3cf74
Add temporary front end for testing virtual datasets
frcroth Jun 25, 2025
391227a
Use mags for WKW datasets
frcroth Jun 25, 2025
916542d
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 2, 2025
3b3b13c
Move zarr streaming stuff to service, todo: add controller with datas…
frcroth Jul 2, 2025
3f81a85
Move old zarr routes to LegacyController, update zarr routes to use id
frcroth Jul 7, 2025
ac0f66d
Use datasetId in BinaryDataController
frcroth Jul 7, 2025
d51dea9
Agglomerate files by dataset id
frcroth Jul 7, 2025
371f3fb
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 7, 2025
611e552
Update more routes to use dataset id
frcroth Jul 7, 2025
a4aaff4
Disable deletion route on virtual datasets for now
frcroth Jul 9, 2025
677c8fe
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 9, 2025
5b220ac
Use datasetId for connectome routes
frcroth Jul 9, 2025
0fc1834
Move compose to webknossos
frcroth Jul 9, 2025
6e27ba5
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 9, 2025
b1797fc
Fix WKW dataset mags being lost in parsing
frcroth Jul 14, 2025
72de557
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 14, 2025
ffdb99f
Adapt RemoteFallbackLayer to use datasetIds
frcroth Jul 14, 2025
f4c2c0c
Add 'isVirtual' column to datasets
frcroth Jul 14, 2025
f4ec53f
Remove usages of datasource id in rest api
frcroth Jul 14, 2025
3e84232
Handle dataset deletion for virtual dataset
frcroth Jul 16, 2025
82d4796
Update view in migration
frcroth Jul 16, 2025
a5edc1c
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 16, 2025
38c85fd
Fix frontend types
frcroth Jul 16, 2025
6c84acd
Try to fix frontend tests
frcroth Jul 16, 2025
50c8839
Lint frontend
frcroth Jul 16, 2025
e639ca2
Remove toAbstractLayer
frcroth Jul 21, 2025
85745a2
Make WKW layers datalayerwithmaglocators
frcroth Jul 21, 2025
f2a22ac
Use mags in frontend WKW layer type validation
frcroth Jul 21, 2025
8872a46
Fix update data source route for virtual datasets
frcroth Jul 21, 2025
00b336b
Update changelog
frcroth Jul 21, 2025
61bf9d7
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 21, 2025
3f73688
Fix types
frcroth Jul 21, 2025
59b566c
Update docs to use mags for WKW datasets
frcroth Jul 21, 2025
66b035a
Format backend
frcroth Jul 21, 2025
0e64a46
Fix backend lint
frcroth Jul 21, 2025
4bef4a8
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 21, 2025
f4cc156
Apply suggestions from the rabbit
frcroth Jul 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions app/controllers/AnnotationIOController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,7 @@ class AnnotationIOController @Inject()(
else volumeTracing.boundingBox

for {
tracingCanHaveSegmentIndex <- canHaveSegmentIndex(organizationId,
dataset.name,
tracingCanHaveSegmentIndex <- canHaveSegmentIndex(dataset._id.toString,
fallbackLayerOpt.map(_.name),
remoteDataStoreClient)
elementClassProto <- fallbackLayerOpt
Expand All @@ -358,13 +357,12 @@ class AnnotationIOController @Inject()(
}

private def canHaveSegmentIndex(
organizationId: String,
datasetName: String,
datasetId: String,
fallbackLayerName: Option[String],
remoteDataStoreClient: WKRemoteDataStoreClient)(implicit ec: ExecutionContext): Fox[Boolean] =
fallbackLayerName match {
case Some(layerName) =>
remoteDataStoreClient.hasSegmentIndexFile(organizationId, datasetName, layerName)
remoteDataStoreClient.hasSegmentIndexFile(datasetId, layerName)
case None =>
Fox.successful(true)
}
Expand Down
16 changes: 12 additions & 4 deletions app/controllers/DatasetController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class DatasetController @Inject()(userService: UserService,
analyticsService: AnalyticsService,
mailchimpClient: MailchimpClient,
wkExploreRemoteLayerService: WKExploreRemoteLayerService,
composeService: ComposeService,
sil: Silhouette[WkEnv])(implicit ec: ExecutionContext, bodyParsers: PlayBodyParsers)
extends Controller
with MetadataAssertions {
Expand Down Expand Up @@ -145,10 +146,10 @@ class DatasetController @Inject()(userService: UserService,
_ <- Fox.fromBool(dataSource.dataLayers.nonEmpty) ?~> "dataset.explore.zeroLayers"
folderIdOpt <- Fox.runOptional(request.body.folderPath)(folderPath =>
folderService.getOrCreateFromPathLiteral(folderPath, request.identity._organization)) ?~> "dataset.explore.autoAdd.getFolder.failed"
_ <- wkExploreRemoteLayerService.addRemoteDatasource(dataSource,
request.body.datasetName,
request.identity,
folderIdOpt) ?~> "dataset.explore.autoAdd.failed"
_ <- wkExploreRemoteLayerService.addRemoteDatasourceToDatabase(dataSource,
request.body.datasetName,
request.identity,
folderIdOpt) ?~> "dataset.explore.autoAdd.failed"
} yield Ok
}

Expand Down Expand Up @@ -490,4 +491,11 @@ class DatasetController @Inject()(userService: UserService,
}
}

def compose(): Action[ComposeRequest] =
sil.SecuredAction.async(validateJson[ComposeRequest]) { implicit request =>
for {
(dataSource, newDatasetId) <- composeService.composeDataset(request.body, request.identity) ?~> "dataset.compose.failed"
} yield Ok(Json.obj("newDatasetId" -> newDatasetId))
}

}
15 changes: 12 additions & 3 deletions app/controllers/UserTokenController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,19 @@ class UserTokenController @Inject()(datasetDAO: DatasetDAO,
isAllowed <- datasetService.isEditableBy(dataset, Some(user))
} yield UserAccessAnswer(isAllowed)

def tryDelete: Fox[UserAccessAnswer] =
for {
_ <- Fox.fromBool(conf.Features.allowDeleteDatasets) ?~> "dataset.delete.disabled"
datasetId <- ObjectId.fromString(id)
dataset <- datasetDAO.findOne(datasetId)(GlobalAccessContext) ?~> "dataset.notFound"
user <- userBox.toFox ?~> "auth.token.noUser"
} yield UserAccessAnswer(user._organization == dataset._organization && user.isAdmin)

mode match {
case AccessMode.read => tryRead
case AccessMode.write => tryWrite
case _ => Fox.successful(UserAccessAnswer(granted = false, Some("invalid access token")))
case AccessMode.read => tryRead
case AccessMode.write => tryWrite
case AccessMode.delete => tryDelete
case _ => Fox.successful(UserAccessAnswer(granted = false, Some("invalid access token")))
}
}

Expand Down
65 changes: 59 additions & 6 deletions app/controllers/WKRemoteDataStoreController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ import com.scalableminds.util.tools.Fox
import com.scalableminds.webknossos.datastore.controllers.JobExportProperties
import com.scalableminds.webknossos.datastore.helpers.{LayerMagLinkInfo, MagLinkInfo}
import com.scalableminds.webknossos.datastore.models.UnfinishedUpload
import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId
import com.scalableminds.webknossos.datastore.models.datasource.{AbstractDataLayer, DataSource, DataSourceId}
import com.scalableminds.webknossos.datastore.models.datasource.inbox.{InboxDataSourceLike => InboxDataSource}
import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataStoreStatus}
import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataSourceRegistrationInfo, DataStoreStatus}
import com.scalableminds.webknossos.datastore.services.uploading.{
LinkedLayerIdentifier,
ReserveAdditionalInformation,
Expand Down Expand Up @@ -242,13 +242,23 @@ class WKRemoteDataStoreController @Inject()(
}
}

def getPaths(name: String, key: String, organizationId: String, directoryName: String): Action[AnyContent] =
def deleteVirtualDataset(name: String, key: String): Action[String] =
Action.async(validateJson[String]) { implicit request =>
dataStoreService.validateAccess(name, key) { _ =>
for {
datasetIdValidated <- ObjectId.fromString(request.body) ?~> "dataset.delete.invalidId" ~> BAD_REQUEST
dataset <- datasetDAO.findOne(datasetIdValidated)(GlobalAccessContext) ~> NOT_FOUND
_ <- Fox.fromBool(dataset.isVirtual) ?~> "dataset.delete.notVirtual" ~> FORBIDDEN
_ <- datasetDAO.deleteDataset(dataset._id, onlyMarkAsDeleted = true)
} yield Ok
}
}

def getPaths(name: String, key: String, datasetId: ObjectId): Action[AnyContent] =
Action.async { implicit request =>
dataStoreService.validateAccess(name, key) { _ =>
for {
organization <- organizationDAO.findOne(organizationId)(GlobalAccessContext)
dataset <- datasetDAO.findOneByDirectoryNameAndOrganization(directoryName, organization._id)(
GlobalAccessContext)
dataset <- datasetDAO.findOne(datasetId)(GlobalAccessContext) ~> NOT_FOUND
layers <- datasetLayerDAO.findAllForDataset(dataset._id)
magsAndLinkedMags <- Fox.serialCombined(layers)(l => datasetService.getPathsForDataLayer(dataset._id, l.name))
magLinkInfos = magsAndLinkedMags.map(_.map { case (mag, linkedMags) => MagLinkInfo(mag, linkedMags) })
Expand All @@ -270,6 +280,49 @@ class WKRemoteDataStoreController @Inject()(

}

// Register a datasource from the datastore as a dataset in the database.
// This is called when adding remote virtual datasets (that should only exist in the database)
// by the data store after exploration.
def registerDataSource(name: String,
key: String,
organizationId: String,
directoryName: String,
token: String): Action[DataSourceRegistrationInfo] =
Action.async(validateJson[DataSourceRegistrationInfo]) { implicit request =>
dataStoreService.validateAccess(name, key) { dataStore =>
for {
user <- bearerTokenService.userForToken(token)
organization <- organizationDAO.findOne(organizationId)(GlobalAccessContext) ?~> Messages(
"organization.notFound",
organizationId) ~> NOT_FOUND
_ <- Fox.fromBool(organization._id == user._organization) ?~> "notAllowed" ~> FORBIDDEN
dataset <- datasetService.createVirtualDataset(
directoryName,
organizationId,
dataStore,
request.body.dataSource,
request.body.folderId,
user
)
} yield Ok(dataset._id.toString)
}
}

def updateDataSource(name: String, key: String, datasetId: ObjectId): Action[DataSource] =
Action.async(validateJson[DataSource]) { implicit request =>
dataStoreService.validateAccess(name, key) { _ =>
for {
_ <- datasetDAO.findOne(datasetId)(GlobalAccessContext) ~> NOT_FOUND
abstractDataSource = request.body.copy(dataLayers = request.body.dataLayers.map(AbstractDataLayer.from))
_ <- datasetDAO.updateDataSourceByDatasetId(datasetId,
name,
abstractDataSource.hashCode(),
abstractDataSource,
isUsable = true)(GlobalAccessContext)
} yield Ok
}
}

def jobExportProperties(name: String, key: String, jobId: ObjectId): Action[AnyContent] = Action.async {
implicit request =>
dataStoreService.validateAccess(name, key) { _ =>
Expand Down
23 changes: 5 additions & 18 deletions app/controllers/WKRemoteTracingStoreController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,15 @@ import com.scalableminds.webknossos.datastore.Annotation.AnnotationProto
import com.scalableminds.webknossos.datastore.SkeletonTracing.SkeletonTracing
import com.scalableminds.webknossos.datastore.VolumeTracing.VolumeTracing
import com.scalableminds.webknossos.datastore.models.annotation.AnnotationLayer
import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId
import com.scalableminds.webknossos.tracingstore.AnnotationUpdatesReport
import com.scalableminds.webknossos.tracingstore.annotation.AnnotationLayerParameters
import com.scalableminds.webknossos.tracingstore.tracings.TracingId
import models.analytics.{AnalyticsService, UpdateAnnotationEvent, UpdateAnnotationViewOnlyEvent}
import models.annotation.AnnotationState._
import models.annotation._
import models.dataset.{DatasetDAO, DatasetService}
import models.organization.OrganizationDAO
import models.user.UserDAO
import models.user.time.TimeSpanService
import play.api.i18n.Messages
import play.api.libs.json.Json
import play.api.mvc.{Action, AnyContent, PlayBodyParsers}
import scalapb.GeneratedMessage
Expand All @@ -33,7 +30,6 @@ class WKRemoteTracingStoreController @Inject()(tracingStoreService: TracingStore
wkSilhouetteEnvironment: WkSilhouetteEnvironment,
timeSpanService: TimeSpanService,
datasetService: DatasetService,
organizationDAO: OrganizationDAO,
userDAO: UserDAO,
annotationInformationProvider: AnnotationInformationProvider,
analyticsService: AnalyticsService,
Expand Down Expand Up @@ -125,15 +121,14 @@ class WKRemoteTracingStoreController @Inject()(tracingStoreService: TracingStore
}
}

def dataSourceIdForAnnotation(name: String, key: String, annotationId: ObjectId): Action[AnyContent] =
def datasetIdForAnnotation(name: String, key: String, annotationId: ObjectId): Action[AnyContent] =
Action.async { implicit request =>
tracingStoreService.validateAccess(name, key) { _ =>
implicit val ctx: DBAccessContext = GlobalAccessContext
for {
annotation <- annotationDAO.findOne(annotationId) ?~> "annotation.notFound"
dataset <- datasetDAO.findOne(annotation._dataset)
organization <- organizationDAO.findOne(dataset._organization)
} yield Ok(Json.toJson(DataSourceId(dataset.directoryName, organization._id)))
dataset <- datasetDAO.findOne(annotation._dataset) ?~> "dataset.notFound"
} yield Ok(Json.toJson(dataset._id.toString))
}
}

Expand All @@ -151,20 +146,12 @@ class WKRemoteTracingStoreController @Inject()(tracingStoreService: TracingStore
}
}

def dataStoreUriForDataset(name: String,
key: String,
organizationId: Option[String],
datasetDirectory: String): Action[AnyContent] =
def dataStoreUriForDataset(name: String, key: String, datasetId: ObjectId): Action[AnyContent] =
Action.async { implicit request =>
tracingStoreService.validateAccess(name, key) { _ =>
implicit val ctx: DBAccessContext = GlobalAccessContext
for {
organizationIdWithFallback <- Fox.fillOption(organizationId) {
datasetDAO.getOrganizationIdForDataset(datasetDirectory)(GlobalAccessContext)
} ?~> Messages("dataset.noAccess", datasetDirectory) ~> FORBIDDEN
dataset <- datasetDAO.findOneByDirectoryNameAndOrganization(datasetDirectory, organizationIdWithFallback) ?~> Messages(
"dataset.noAccess",
datasetDirectory) ~> FORBIDDEN
dataset <- datasetDAO.findOne(datasetId) ?~> "dataset.notFound" ~> NOT_FOUND
dataStore <- datasetService.dataStoreFor(dataset)
} yield Ok(Json.toJson(dataStore.url))
}
Expand Down
5 changes: 4 additions & 1 deletion app/models/annotation/AnnotationService.scala
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ class AnnotationService @Inject()(
private def createVolumeTracing(
dataSource: DataSource,
datasetOrganizationId: String,
datasetId: ObjectId,
datasetDataStore: DataStore,
fallbackLayer: Option[SegmentationLayer],
boundingBox: Option[BoundingBox] = None,
Expand All @@ -138,7 +139,7 @@ class AnnotationService @Inject()(
remoteDatastoreClient = new WKRemoteDataStoreClient(datasetDataStore, rpc)
fallbackLayerHasSegmentIndex <- fallbackLayer match {
case Some(layer) =>
remoteDatastoreClient.hasSegmentIndexFile(datasetOrganizationId, dataSource.id.directoryName, layer.name)
remoteDatastoreClient.hasSegmentIndexFile(datasetId.toString, layer.name)
case None => Fox.successful(false)
}
elementClassProto <- ElementClass
Expand Down Expand Up @@ -237,6 +238,7 @@ class AnnotationService @Inject()(
volumeTracing <- createVolumeTracing(
dataSource,
dataset._organization,
dataset._id,
dataStore,
fallbackLayer,
magRestrictions = params.magRestrictions.getOrElse(MagRestrictions.empty),
Expand Down Expand Up @@ -430,6 +432,7 @@ class AnnotationService @Inject()(
volumeTracing <- createVolumeTracing(
dataSource,
dataset._organization,
datasetId,
dataStore,
fallbackLayer = fallbackLayer,
boundingBox = boundingBox.flatMap { box =>
Expand Down
104 changes: 104 additions & 0 deletions app/models/dataset/ComposeService.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package models.dataset

import com.scalableminds.util.accesscontext.DBAccessContext
import com.scalableminds.util.objectid.ObjectId
import com.scalableminds.util.tools.{Fox, FoxImplicits}
import com.scalableminds.webknossos.datastore.models.VoxelSize
import com.scalableminds.webknossos.datastore.models.datasource._
import models.user.User
import play.api.libs.json.{Json, OFormat}

import javax.inject.Inject
import scala.concurrent.ExecutionContext

case class ComposeRequest(
newDatasetName: String,
targetFolderId: String,
organizationId: String,
voxelSize: VoxelSize,
layers: Seq[ComposeRequestLayer]
)

object ComposeRequest {
implicit val composeRequestFormat: OFormat[ComposeRequest] = Json.format[ComposeRequest]
}
case class ComposeRequestLayer(
datasetId: String,
sourceName: String,
newName: String,
transformations: Seq[CoordinateTransformation]
)

object ComposeRequestLayer {
implicit val composeLayerFormat: OFormat[ComposeRequestLayer] = Json.format[ComposeRequestLayer]
}

class ComposeService @Inject()(datasetDAO: DatasetDAO, dataStoreDAO: DataStoreDAO, datasetService: DatasetService)(
implicit ec: ExecutionContext)
extends FoxImplicits {

def composeDataset(composeRequest: ComposeRequest, user: User)(
implicit ctx: DBAccessContext): Fox[(DataSource, ObjectId)] =
for {
_ <- isComposable(composeRequest) ?~> "Datasets are not composable, they are not on the same data store"
dataSource <- createDatasource(composeRequest, composeRequest.newDatasetName, composeRequest.organizationId)
dataStore <- dataStoreDAO.findOneWithUploadsAllowed
dataset <- datasetService.createVirtualDataset(composeRequest.newDatasetName,
composeRequest.organizationId,
dataStore,
dataSource,
Some(composeRequest.targetFolderId),
user)

} yield (dataSource, dataset._id)

private def getLayerFromComposeLayer(composeLayer: ComposeRequestLayer)(
implicit ctx: DBAccessContext): Fox[DataLayer] =
for {
datasetIdValidated <- ObjectId.fromString(composeLayer.datasetId) ?~> "Invalid dataset ID"
dataset <- datasetDAO.findOne(datasetIdValidated) ?~> "Dataset not found"
ds <- datasetService.fullDataSourceFor(dataset)
ds <- ds.toUsable.toFox ?~> "Dataset not usable"
layer <- ds.dataLayers.find(_.name == composeLayer.sourceName).toFox
applyCoordinateTransformations = (cOpt: Option[List[CoordinateTransformation]]) =>
cOpt match {
case Some(c) => Some(c ++ composeLayer.transformations.toList)
case None => Some(composeLayer.transformations.toList)
}
editedLayer: DataLayer <- layer match {
case l: DataLayerWithMagLocators =>
Fox.successful(
l.mapped(name = composeLayer.newName,
coordinateTransformations = applyCoordinateTransformations(l.coordinateTransformations)))
case _ => Fox.failure("Unsupported layer type for composition: " + layer.getClass.getSimpleName)
}
} yield editedLayer

private def isComposable(composeRequest: ComposeRequest)(implicit ctx: DBAccessContext): Fox[Boolean] =
// Check that all datasets are on the same data store
// Using virtual datasets, we should also be able to compose datasets using non-file paths from different data
// stores, however, the data store is only stored for each data set and not per mag.
for {
_ <- Fox.fromBool(composeRequest.layers.nonEmpty) ?~> "Cannot compose dataset with no layers"
datasetIds = composeRequest.layers.map(_.datasetId).distinct
datasetIdsValidated <- Fox.serialCombined(datasetIds.toList)(ObjectId.fromString(_)) ?~> "Invalid dataset ID"
datasets <- Fox.serialCombined(datasetIdsValidated)(datasetDAO.findOne(_))
dataStores = datasets.map(_._dataStore)
} yield {
dataStores.distinct.size == 1
}

private def createDatasource(composeRequest: ComposeRequest, datasetDirectoryName: String, organizationId: String)(
implicit ctx: DBAccessContext): Fox[DataSource] =
for {
layers <- Fox.serialCombined(composeRequest.layers.toList)(getLayerFromComposeLayer(_))
dataSource = GenericDataSource(
DataSourceId(datasetDirectoryName, organizationId),
layers,
composeRequest.voxelSize,
None
)

} yield dataSource

}
Loading