Skip to content

Calculate s3 storage #8789

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 29 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
9f89d38
Explore remote datasets as virtual datasets
frcroth Jun 23, 2025
42101a9
Do not have virtual remote datasets deleted
frcroth Jun 23, 2025
18dfe98
Put mag in db
frcroth Jun 25, 2025
9c3cf74
Add temporary front end for testing virtual datasets
frcroth Jun 25, 2025
391227a
Use mags for WKW datasets
frcroth Jun 25, 2025
916542d
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 2, 2025
3b3b13c
Move zarr streaming stuff to service, todo: add controller with datas…
frcroth Jul 2, 2025
3f81a85
Move old zarr routes to LegacyController, update zarr routes to use id
frcroth Jul 7, 2025
ac0f66d
Use datasetId in BinaryDataController
frcroth Jul 7, 2025
d51dea9
Agglomerate files by dataset id
frcroth Jul 7, 2025
371f3fb
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 7, 2025
611e552
Update more routes to use dataset id
frcroth Jul 7, 2025
a4aaff4
Disable deletion route on virtual datasets for now
frcroth Jul 9, 2025
677c8fe
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 9, 2025
5b220ac
Use datasetId for connectome routes
frcroth Jul 9, 2025
0fc1834
Move compose to webknossos
frcroth Jul 9, 2025
6e27ba5
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 9, 2025
b1797fc
Fix WKW dataset mags being lost in parsing
frcroth Jul 14, 2025
72de557
Merge branch 'master' into explore-virtual-datasets
frcroth Jul 14, 2025
ffdb99f
Adapt RemoteFallbackLayer to use datasetIds
frcroth Jul 14, 2025
f4c2c0c
Add 'isVirtual' column to datasets
frcroth Jul 14, 2025
f4ec53f
Remove usages of datasource id in rest api
frcroth Jul 14, 2025
6654d1e
WIP add getting used storage bytes for remote datasets
MichaelBuessemeyer Jul 15, 2025
d9fb2f3
WIP more adding getting used storage bytes for remote datasets
MichaelBuessemeyer Jul 16, 2025
bed0122
WIP measure storage
MichaelBuessemeyer Jul 17, 2025
092da3a
fix usedstorage schema and path mapping
MichaelBuessemeyer Jul 18, 2025
96e1451
Add filtering for remote mag which are part of registered datavaults
MichaelBuessemeyer Jul 21, 2025
72805db
update used storage dataset wise
MichaelBuessemeyer Jul 21, 2025
e5c5c84
Merge branch 'master' of github.com:scalableminds/webknossos into cal…
MichaelBuessemeyer Jul 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions app/controllers/AnnotationIOController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,7 @@ class AnnotationIOController @Inject()(
else volumeTracing.boundingBox

for {
tracingCanHaveSegmentIndex <- canHaveSegmentIndex(organizationId,
dataset.name,
tracingCanHaveSegmentIndex <- canHaveSegmentIndex(dataset._id.toString,
fallbackLayerOpt.map(_.name),
remoteDataStoreClient)
elementClassProto <- fallbackLayerOpt
Expand All @@ -358,13 +357,12 @@ class AnnotationIOController @Inject()(
}

private def canHaveSegmentIndex(
organizationId: String,
datasetName: String,
datasetId: String,
fallbackLayerName: Option[String],
remoteDataStoreClient: WKRemoteDataStoreClient)(implicit ec: ExecutionContext): Fox[Boolean] =
fallbackLayerName match {
case Some(layerName) =>
remoteDataStoreClient.hasSegmentIndexFile(organizationId, datasetName, layerName)
remoteDataStoreClient.hasSegmentIndexFile(datasetId, layerName)
case None =>
Fox.successful(true)
}
Expand Down
16 changes: 12 additions & 4 deletions app/controllers/DatasetController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class DatasetController @Inject()(userService: UserService,
analyticsService: AnalyticsService,
mailchimpClient: MailchimpClient,
wkExploreRemoteLayerService: WKExploreRemoteLayerService,
composeService: ComposeService,
sil: Silhouette[WkEnv])(implicit ec: ExecutionContext, bodyParsers: PlayBodyParsers)
extends Controller
with MetadataAssertions {
Expand Down Expand Up @@ -145,10 +146,10 @@ class DatasetController @Inject()(userService: UserService,
_ <- Fox.fromBool(dataSource.dataLayers.nonEmpty) ?~> "dataset.explore.zeroLayers"
folderIdOpt <- Fox.runOptional(request.body.folderPath)(folderPath =>
folderService.getOrCreateFromPathLiteral(folderPath, request.identity._organization)) ?~> "dataset.explore.autoAdd.getFolder.failed"
_ <- wkExploreRemoteLayerService.addRemoteDatasource(dataSource,
request.body.datasetName,
request.identity,
folderIdOpt) ?~> "dataset.explore.autoAdd.failed"
_ <- wkExploreRemoteLayerService.addRemoteDatasourceToDatabase(dataSource,
request.body.datasetName,
request.identity,
folderIdOpt) ?~> "dataset.explore.autoAdd.failed"
} yield Ok
}

Expand Down Expand Up @@ -490,4 +491,11 @@ class DatasetController @Inject()(userService: UserService,
}
}

def compose(): Action[ComposeRequest] =
sil.SecuredAction.async(validateJson[ComposeRequest]) { implicit request =>
for {
(dataSource, newDatasetId) <- composeService.composeDataset(request.body, request.identity) ?~> "dataset.compose.failed"
} yield Ok(Json.obj("newDatasetId" -> newDatasetId))
}

}
6 changes: 5 additions & 1 deletion app/controllers/InitialDataController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,11 @@ Samplecountry
"This is a wonderful dummy publication, it has authors, it has a link, it has a doi number, those could go here.\nLorem [ipsum](https://github.com/scalableminds/webknossos) dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.")
)
private val defaultDataStore =
DataStore(conf.Datastore.name, conf.Http.uri, conf.Datastore.publicUri.getOrElse(conf.Http.uri), conf.Datastore.key)
DataStore(conf.Datastore.name,
conf.Http.uri,
conf.Datastore.publicUri.getOrElse(conf.Http.uri),
conf.Datastore.key,
reportUsedStorageEnabled = true)
private val defaultAiModel = AiModel(
ObjectId("66544a56d20000af0e42ba0f"),
defaultOrganization._id,
Expand Down
30 changes: 29 additions & 1 deletion app/controllers/WKRemoteDataStoreController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import com.scalableminds.webknossos.datastore.helpers.{LayerMagLinkInfo, MagLink
import com.scalableminds.webknossos.datastore.models.UnfinishedUpload
import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId
import com.scalableminds.webknossos.datastore.models.datasource.inbox.{InboxDataSourceLike => InboxDataSource}
import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataStoreStatus}
import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataSourceRegistrationInfo, DataStoreStatus}
import com.scalableminds.webknossos.datastore.services.uploading.{
LinkedLayerIdentifier,
ReserveAdditionalInformation,
Expand Down Expand Up @@ -270,6 +270,34 @@ class WKRemoteDataStoreController @Inject()(

}

// Register a datasource from the datastore as a dataset in the database.
// This is called when adding remote virtual datasets (that should only exist in the database)
// by the data store after exploration.
def registerDataSource(name: String,
key: String,
organizationId: String,
directoryName: String,
token: String): Action[DataSourceRegistrationInfo] =
Action.async(validateJson[DataSourceRegistrationInfo]) { implicit request =>
dataStoreService.validateAccess(name, key) { dataStore =>
for {
user <- bearerTokenService.userForToken(token)
organization <- organizationDAO.findOne(organizationId)(GlobalAccessContext) ?~> Messages(
"organization.notFound",
organizationId) ~> NOT_FOUND
_ <- Fox.fromBool(organization._id == user._organization) ?~> "notAllowed" ~> FORBIDDEN
dataset <- datasetService.createVirtualDataset(
directoryName,
organizationId,
dataStore,
request.body.dataSource,
request.body.folderId,
user
)
} yield Ok(dataset._id.toString)
}
}

def jobExportProperties(name: String, key: String, jobId: ObjectId): Action[AnyContent] = Action.async {
implicit request =>
dataStoreService.validateAccess(name, key) { _ =>
Expand Down
23 changes: 5 additions & 18 deletions app/controllers/WKRemoteTracingStoreController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,15 @@ import com.scalableminds.webknossos.datastore.Annotation.AnnotationProto
import com.scalableminds.webknossos.datastore.SkeletonTracing.SkeletonTracing
import com.scalableminds.webknossos.datastore.VolumeTracing.VolumeTracing
import com.scalableminds.webknossos.datastore.models.annotation.AnnotationLayer
import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId
import com.scalableminds.webknossos.tracingstore.AnnotationUpdatesReport
import com.scalableminds.webknossos.tracingstore.annotation.AnnotationLayerParameters
import com.scalableminds.webknossos.tracingstore.tracings.TracingId
import models.analytics.{AnalyticsService, UpdateAnnotationEvent, UpdateAnnotationViewOnlyEvent}
import models.annotation.AnnotationState._
import models.annotation._
import models.dataset.{DatasetDAO, DatasetService}
import models.organization.OrganizationDAO
import models.user.UserDAO
import models.user.time.TimeSpanService
import play.api.i18n.Messages
import play.api.libs.json.Json
import play.api.mvc.{Action, AnyContent, PlayBodyParsers}
import scalapb.GeneratedMessage
Expand All @@ -33,7 +30,6 @@ class WKRemoteTracingStoreController @Inject()(tracingStoreService: TracingStore
wkSilhouetteEnvironment: WkSilhouetteEnvironment,
timeSpanService: TimeSpanService,
datasetService: DatasetService,
organizationDAO: OrganizationDAO,
userDAO: UserDAO,
annotationInformationProvider: AnnotationInformationProvider,
analyticsService: AnalyticsService,
Expand Down Expand Up @@ -125,15 +121,14 @@ class WKRemoteTracingStoreController @Inject()(tracingStoreService: TracingStore
}
}

def dataSourceIdForAnnotation(name: String, key: String, annotationId: ObjectId): Action[AnyContent] =
def datasetIdForAnnotation(name: String, key: String, annotationId: ObjectId): Action[AnyContent] =
Action.async { implicit request =>
tracingStoreService.validateAccess(name, key) { _ =>
implicit val ctx: DBAccessContext = GlobalAccessContext
for {
annotation <- annotationDAO.findOne(annotationId) ?~> "annotation.notFound"
dataset <- datasetDAO.findOne(annotation._dataset)
organization <- organizationDAO.findOne(dataset._organization)
} yield Ok(Json.toJson(DataSourceId(dataset.directoryName, organization._id)))
dataset <- datasetDAO.findOne(annotation._dataset) ?~> "dataset.notFound"
} yield Ok(dataset._id.toString)
}
}

Expand All @@ -151,20 +146,12 @@ class WKRemoteTracingStoreController @Inject()(tracingStoreService: TracingStore
}
}

def dataStoreUriForDataset(name: String,
key: String,
organizationId: Option[String],
datasetDirectory: String): Action[AnyContent] =
def dataStoreUriForDataset(name: String, key: String, datasetId: ObjectId): Action[AnyContent] =
Action.async { implicit request =>
tracingStoreService.validateAccess(name, key) { _ =>
implicit val ctx: DBAccessContext = GlobalAccessContext
for {
organizationIdWithFallback <- Fox.fillOption(organizationId) {
datasetDAO.getOrganizationIdForDataset(datasetDirectory)(GlobalAccessContext)
} ?~> Messages("dataset.noAccess", datasetDirectory) ~> FORBIDDEN
dataset <- datasetDAO.findOneByDirectoryNameAndOrganization(datasetDirectory, organizationIdWithFallback) ?~> Messages(
"dataset.noAccess",
datasetDirectory) ~> FORBIDDEN
dataset <- datasetDAO.findOne(datasetId) ?~> "dataset.notFound" ~> NOT_FOUND
dataStore <- datasetService.dataStoreFor(dataset)
} yield Ok(Json.toJson(dataStore.url))
}
Expand Down
5 changes: 4 additions & 1 deletion app/models/annotation/AnnotationService.scala
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ class AnnotationService @Inject()(
private def createVolumeTracing(
dataSource: DataSource,
datasetOrganizationId: String,
datasetId: ObjectId,
datasetDataStore: DataStore,
fallbackLayer: Option[SegmentationLayer],
boundingBox: Option[BoundingBox] = None,
Expand All @@ -138,7 +139,7 @@ class AnnotationService @Inject()(
remoteDatastoreClient = new WKRemoteDataStoreClient(datasetDataStore, rpc)
fallbackLayerHasSegmentIndex <- fallbackLayer match {
case Some(layer) =>
remoteDatastoreClient.hasSegmentIndexFile(datasetOrganizationId, dataSource.id.directoryName, layer.name)
remoteDatastoreClient.hasSegmentIndexFile(datasetId.toString, layer.name)
case None => Fox.successful(false)
}
elementClassProto <- ElementClass
Expand Down Expand Up @@ -237,6 +238,7 @@ class AnnotationService @Inject()(
volumeTracing <- createVolumeTracing(
dataSource,
dataset._organization,
dataset._id,
dataStore,
fallbackLayer,
magRestrictions = params.magRestrictions.getOrElse(MagRestrictions.empty),
Expand Down Expand Up @@ -430,6 +432,7 @@ class AnnotationService @Inject()(
volumeTracing <- createVolumeTracing(
dataSource,
dataset._organization,
datasetId,
dataStore,
fallbackLayer = fallbackLayer,
boundingBox = boundingBox.flatMap { box =>
Expand Down
109 changes: 109 additions & 0 deletions app/models/dataset/ComposeService.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package models.dataset

import com.scalableminds.util.accesscontext.DBAccessContext
import com.scalableminds.util.objectid.ObjectId
import com.scalableminds.util.tools.{Fox, FoxImplicits}
import com.scalableminds.webknossos.datastore.dataformats.layers.{WKWDataLayer, WKWSegmentationLayer}
import com.scalableminds.webknossos.datastore.models.VoxelSize
import com.scalableminds.webknossos.datastore.models.datasource._
import models.user.User
import play.api.libs.json.{Json, OFormat}

import javax.inject.Inject
import scala.concurrent.ExecutionContext

case class ComposeRequest(
newDatasetName: String,
targetFolderId: String,
organizationId: String,
voxelSize: VoxelSize,
layers: Seq[ComposeRequestLayer]
)

object ComposeRequest {
implicit val composeRequestFormat: OFormat[ComposeRequest] = Json.format[ComposeRequest]
}
case class ComposeRequestLayer(
datasetId: String,
sourceName: String,
newName: String,
transformations: Seq[CoordinateTransformation]
)

object ComposeRequestLayer {
implicit val composeLayerFormat: OFormat[ComposeRequestLayer] = Json.format[ComposeRequestLayer]
}

class ComposeService @Inject()(datasetDAO: DatasetDAO, dataStoreDAO: DataStoreDAO, datasetService: DatasetService)(
implicit ec: ExecutionContext)
extends FoxImplicits {

def composeDataset(composeRequest: ComposeRequest, user: User)(
implicit ctx: DBAccessContext): Fox[(DataSource, ObjectId)] =
for {
_ <- isComposable(composeRequest) ?~> "Datasets are not composable, they are not on the same data store"
dataSource <- createDatasource(composeRequest, composeRequest.newDatasetName, composeRequest.organizationId)
dataStore <- dataStoreDAO.findOneWithUploadsAllowed
dataset <- datasetService.createVirtualDataset(composeRequest.newDatasetName,
composeRequest.organizationId,
dataStore,
dataSource,
Some(composeRequest.targetFolderId),
user)

} yield (dataSource, dataset._id)

private def getLayerFromComposeLayer(composeLayer: ComposeRequestLayer)(
implicit ctx: DBAccessContext): Fox[DataLayer] =
for {
datasetIdValidated <- ObjectId.fromString(composeLayer.datasetId) ?~> "Invalid dataset ID"
dataset <- datasetDAO.findOne(datasetIdValidated) ?~> "Dataset not found"
ds <- datasetService.fullDataSourceFor(dataset)
ds <- ds.toUsable.toFox ?~> "Dataset not usable"
layer <- ds.dataLayers.find(_.name == composeLayer.sourceName).toFox
applyCoordinateTransformations = (cOpt: Option[List[CoordinateTransformation]]) =>
cOpt match {
case Some(c) => Some(c ++ composeLayer.transformations.toList)
case None => Some(composeLayer.transformations.toList)
}
editedLayer: DataLayer = layer match {
case l: DataLayerWithMagLocators =>
l.mapped(name = composeLayer.newName,
coordinateTransformations = applyCoordinateTransformations(l.coordinateTransformations))
case l: WKWDataLayer =>
l.copy(name = composeLayer.newName,
coordinateTransformations = applyCoordinateTransformations(l.coordinateTransformations))
case l: WKWSegmentationLayer =>
l.copy(name = composeLayer.newName,
coordinateTransformations = applyCoordinateTransformations(l.coordinateTransformations))
}
} yield editedLayer

private def isComposable(composeRequest: ComposeRequest)(implicit ctx: DBAccessContext): Fox[Boolean] =
// Check that all datasets are on the same data store
// Using virtual datasets, we should also be able to compose datasets using non-file paths from different data
// stores, however, the data store is only stored for each data set and not per mag.
for {
_ <- Fox.successful(())
datasetIds = composeRequest.layers.map(_.datasetId).distinct
datasetIdsValidated <- Fox.serialCombined(datasetIds.toList)(ObjectId.fromString(_)) ?~> "Invalid dataset ID"
datasets <- Fox.serialCombined(datasetIdsValidated)(datasetDAO.findOne(_))
dataStores = datasets.map(_._dataStore)
} yield {
dataStores.distinct.size == 1
}

private def createDatasource(composeRequest: ComposeRequest, datasetDirectoryName: String, organizationId: String)(
implicit ctx: DBAccessContext): Fox[DataSource] =
for {
layers <- Fox.serialCombined(composeRequest.layers.toList)(getLayerFromComposeLayer(_))
dataSource = GenericDataSource(
DataSourceId(datasetDirectoryName, organizationId),
layers,
composeRequest.voxelSize,
None
)

} yield dataSource

}
Loading
Loading