|
14 | 14 | # See the License for the specific language governing permissions and
|
15 | 15 | # limitations under the License.
|
16 | 16 |
|
| 17 | +import logging |
17 | 18 | import os
|
| 19 | +import posixpath |
18 | 20 | import re
|
19 | 21 | import time
|
20 |
| -import logging |
21 |
| -import itertools |
22 |
| -import posixpath |
23 |
| -import urllib.error |
24 |
| -import urllib.request |
25 | 22 | from builtins import object, str
|
26 | 23 | from urllib.parse import urlparse as lib_urlparse
|
27 | 24 |
|
|
33 | 30 | from django.utils.translation import gettext as _
|
34 | 31 |
|
35 | 32 | from aws import s3
|
36 |
| -from aws.conf import AWS_ACCOUNTS, PERMISSION_ACTION_S3, get_default_region, get_locations, is_raz_s3 |
37 |
| -from aws.s3 import S3A_ROOT, normpath, s3file, translate_s3_error |
| 33 | +from aws.conf import AWS_ACCOUNTS, get_default_region, get_locations, is_raz_s3, PERMISSION_ACTION_S3 |
| 34 | +from aws.s3 import normpath, S3A_ROOT, s3file, translate_s3_error |
38 | 35 | from aws.s3.s3stat import S3Stat
|
39 | 36 | from filebrowser.conf import REMOTE_STORAGE_HOME
|
40 | 37 |
|
41 | 38 | DEFAULT_READ_SIZE = 1024 * 1024 # 1MB
|
42 | 39 | BUCKET_NAME_PATTERN = re.compile(
|
43 | 40 | r"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9_\-]*[a-zA-Z0-9])\.)*(?:[A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9_\-]*[A-Za-z0-9]))$")
|
| 41 | +S3A_DELETE_CHUNK_SIZE = 1000 # S3 API limit for bulk delete operations |
44 | 42 |
|
45 | 43 | LOG = logging.getLogger()
|
46 | 44 |
|
@@ -353,38 +351,95 @@ def listdir(self, path, glob=None):
|
353 | 351 | @translate_s3_error
|
354 | 352 | @auth_error_handler
|
355 | 353 | def rmtree(self, path, skipTrash=True):
|
| 354 | + """ |
| 355 | + Recursively deletes objects from an S3 path. |
| 356 | +
|
| 357 | + This method can delete a single object, all objects under a given prefix (a "directory"), |
| 358 | + or an entire bucket. It handles paginating through keys for deletion to respect |
| 359 | + S3's 1000-key limit per bulk delete request. |
| 360 | +
|
| 361 | + Args: |
| 362 | + path (str): The S3 URI to delete (e.g., 's3a://test-bucket/test-folder/'). |
| 363 | + skipTrash (bool): If False, this operation is not supported and will fail. |
| 364 | +
|
| 365 | + Raises: |
| 366 | + NotImplementedError: If `skipTrash` is set to False. |
| 367 | + S3FileSystemException: If any errors occur during the deletion process. |
| 368 | + ValueError: If the provided path is not a valid S3 URI. |
| 369 | + """ |
356 | 370 | if not skipTrash:
|
357 |
| - raise NotImplementedError('Moving to trash is not implemented for S3') |
| 371 | + raise NotImplementedError("Moving to trash is not implemented for S3.") |
| 372 | + |
| 373 | + try: |
| 374 | + bucket_name, key_name = s3.parse_uri(path)[:2] |
| 375 | + except Exception: |
| 376 | + raise ValueError(f"Invalid S3 URI provided: {path}") |
| 377 | + |
| 378 | + LOG.info(f"Attempting to recursively delete path: {path}") |
| 379 | + LOG.debug(f"Parsed bucket: '{bucket_name}', key: '{key_name}'") |
358 | 380 |
|
359 |
| - bucket_name, key_name = s3.parse_uri(path)[:2] |
360 | 381 | if bucket_name and not key_name:
|
361 |
| - self._delete_bucket(bucket_name) |
362 |
| - else: |
363 |
| - if self.isdir(path): |
364 |
| - path = self._append_separator(path) # Really need to make sure we end with a '/' |
| 382 | + return self._delete_bucket(bucket_name) |
| 383 | + |
| 384 | + # Ensure directory-like paths end with a '/' to be used as a prefix |
| 385 | + if self.isdir(path): |
| 386 | + path = self._append_separator(path) |
| 387 | + key_name = self._append_separator(key_name) |
365 | 388 |
|
| 389 | + is_directory_key = key_name and key_name.endswith("/") |
| 390 | + |
| 391 | + try: |
366 | 392 | key = self._get_key(path, validate=False)
|
| 393 | + bucket = key.bucket |
| 394 | + except Exception as e: |
| 395 | + # Handle cases where the bucket might not exist or connection fails |
| 396 | + LOG.error(f"Failed to connect to bucket '{bucket_name}'. Error: {e}") |
| 397 | + raise S3FileSystemException(f"Could not access bucket '{bucket_name}'.") from e |
367 | 398 |
|
368 |
| - if key.exists(): |
369 |
| - dir_keys = [] |
| 399 | + if key.exists() or is_directory_key: # Check both key.exists() and isdir to handle virtual dirs |
| 400 | + keys_to_delete = [] |
370 | 401 |
|
371 |
| - if self.isdir(path): |
372 |
| - _, dir_key_name = s3.parse_uri(path)[:2] |
373 |
| - dir_keys = key.bucket.list(prefix=dir_key_name) |
| 402 | + if is_directory_key: |
| 403 | + for k in bucket.list(prefix=key_name): |
| 404 | + keys_to_delete.append(k) |
374 | 405 |
|
375 |
| - if not dir_keys: |
376 |
| - # Avoid Raz bulk delete issue |
377 |
| - deleted_key = key.delete() |
378 |
| - if deleted_key.exists(): |
379 |
| - raise S3FileSystemException('Could not delete key %s' % deleted_key) |
380 |
| - else: |
381 |
| - result = key.bucket.delete_keys(list(dir_keys)) |
| 406 | + # Explicitly add the current directory marker (empty object) if it exists but wasn't included |
| 407 | + dir_marker = bucket.get_key(key_name) |
| 408 | + if dir_marker is not None and dir_marker not in keys_to_delete: |
| 409 | + keys_to_delete.append(dir_marker) |
| 410 | + else: |
| 411 | + # Add the single key object |
| 412 | + keys_to_delete.append(key) |
| 413 | + |
| 414 | + LOG.info(f"Found {len(keys_to_delete)} S3 object(s) to delete under prefix '{key_name}'.") |
| 415 | + |
| 416 | + # Calculate total chunks using integer ceiling division. |
| 417 | + total_chunks = (len(keys_to_delete) + S3A_DELETE_CHUNK_SIZE - 1) // S3A_DELETE_CHUNK_SIZE |
| 418 | + all_errors = [] |
| 419 | + |
| 420 | + # Process keys in chunks of 1000 (S3 API limit) |
| 421 | + for i in range(0, len(keys_to_delete), S3A_DELETE_CHUNK_SIZE): |
| 422 | + chunk = keys_to_delete[i : i + S3A_DELETE_CHUNK_SIZE] |
| 423 | + |
| 424 | + LOG.debug(f"Deleting chunk {i // S3A_DELETE_CHUNK_SIZE + 1} of {total_chunks} (size: {len(chunk)} keys).") |
| 425 | + try: |
| 426 | + result = bucket.delete_keys(chunk) |
382 | 427 | if result.errors:
|
383 |
| - msg = "%d errors occurred while attempting to delete the following S3 paths:\n%s" % ( |
384 |
| - len(result.errors), '\n'.join(['%s: %s' % (error.key, error.message) for error in result.errors]) |
385 |
| - ) |
386 |
| - LOG.error(msg) |
387 |
| - raise S3FileSystemException(msg) |
| 428 | + LOG.warning(f"Encountered {len(result.errors)} errors in this deletion chunk.") |
| 429 | + all_errors.extend(result.errors) |
| 430 | + except S3ResponseError as e: |
| 431 | + # Catch potential connection errors or access denied on the delete call itself |
| 432 | + LOG.error(f"An S3 API error occurred during key deletion: {e}") |
| 433 | + raise S3FileSystemException(f"Failed to delete objects: {e.message}") from e |
| 434 | + |
| 435 | + # After deleting all keys, handle any accumulated errors |
| 436 | + if all_errors: |
| 437 | + error_details = "\n".join([f"- {err.key}: {err.message}" for err in all_errors]) |
| 438 | + msg = f"{len(all_errors)} errors occurred while deleting objects from '{path}':\n{error_details}" |
| 439 | + LOG.error(msg) |
| 440 | + raise S3FileSystemException(msg) |
| 441 | + |
| 442 | + LOG.info(f"Successfully deleted {len(keys_to_delete)} object(s) from path: {path}") |
388 | 443 |
|
389 | 444 | @translate_s3_error
|
390 | 445 | @auth_error_handler
|
|
0 commit comments