Skip to content

Commit d2892ee

Browse files
authored
[s3] Improve S3 recursive delete to handle large directories and virtual directory scenarios
- Refactors the recursive deletion logic to efficiently handle deleting large numbers of S3 objects by chunking delete requests according to the S3 API's 1000-key limit. - Fixes deletion of virtual directories where path like s3a://bucket/child/inner.txt creates a child directory and deleting that was failing before. - Adds detailed error handling, logging, and validation for invalid URIs and bucket access failures. Enhances robustness when deleting directories and single objects, and clarifies the method's behavior and limitations.
1 parent ff7cb77 commit d2892ee

File tree

2 files changed

+311
-186
lines changed

2 files changed

+311
-186
lines changed

desktop/libs/aws/src/aws/s3/s3fs.py

Lines changed: 85 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,11 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17+
import logging
1718
import os
19+
import posixpath
1820
import re
1921
import time
20-
import logging
21-
import itertools
22-
import posixpath
23-
import urllib.error
24-
import urllib.request
2522
from builtins import object, str
2623
from urllib.parse import urlparse as lib_urlparse
2724

@@ -33,14 +30,15 @@
3330
from django.utils.translation import gettext as _
3431

3532
from aws import s3
36-
from aws.conf import AWS_ACCOUNTS, PERMISSION_ACTION_S3, get_default_region, get_locations, is_raz_s3
37-
from aws.s3 import S3A_ROOT, normpath, s3file, translate_s3_error
33+
from aws.conf import AWS_ACCOUNTS, get_default_region, get_locations, is_raz_s3, PERMISSION_ACTION_S3
34+
from aws.s3 import normpath, S3A_ROOT, s3file, translate_s3_error
3835
from aws.s3.s3stat import S3Stat
3936
from filebrowser.conf import REMOTE_STORAGE_HOME
4037

4138
DEFAULT_READ_SIZE = 1024 * 1024 # 1MB
4239
BUCKET_NAME_PATTERN = re.compile(
4340
r"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9_\-]*[a-zA-Z0-9])\.)*(?:[A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9_\-]*[A-Za-z0-9]))$")
41+
S3A_DELETE_CHUNK_SIZE = 1000 # S3 API limit for bulk delete operations
4442

4543
LOG = logging.getLogger()
4644

@@ -353,38 +351,95 @@ def listdir(self, path, glob=None):
353351
@translate_s3_error
354352
@auth_error_handler
355353
def rmtree(self, path, skipTrash=True):
354+
"""
355+
Recursively deletes objects from an S3 path.
356+
357+
This method can delete a single object, all objects under a given prefix (a "directory"),
358+
or an entire bucket. It handles paginating through keys for deletion to respect
359+
S3's 1000-key limit per bulk delete request.
360+
361+
Args:
362+
path (str): The S3 URI to delete (e.g., 's3a://test-bucket/test-folder/').
363+
skipTrash (bool): If False, this operation is not supported and will fail.
364+
365+
Raises:
366+
NotImplementedError: If `skipTrash` is set to False.
367+
S3FileSystemException: If any errors occur during the deletion process.
368+
ValueError: If the provided path is not a valid S3 URI.
369+
"""
356370
if not skipTrash:
357-
raise NotImplementedError('Moving to trash is not implemented for S3')
371+
raise NotImplementedError("Moving to trash is not implemented for S3.")
372+
373+
try:
374+
bucket_name, key_name = s3.parse_uri(path)[:2]
375+
except Exception:
376+
raise ValueError(f"Invalid S3 URI provided: {path}")
377+
378+
LOG.info(f"Attempting to recursively delete path: {path}")
379+
LOG.debug(f"Parsed bucket: '{bucket_name}', key: '{key_name}'")
358380

359-
bucket_name, key_name = s3.parse_uri(path)[:2]
360381
if bucket_name and not key_name:
361-
self._delete_bucket(bucket_name)
362-
else:
363-
if self.isdir(path):
364-
path = self._append_separator(path) # Really need to make sure we end with a '/'
382+
return self._delete_bucket(bucket_name)
383+
384+
# Ensure directory-like paths end with a '/' to be used as a prefix
385+
if self.isdir(path):
386+
path = self._append_separator(path)
387+
key_name = self._append_separator(key_name)
365388

389+
is_directory_key = key_name and key_name.endswith("/")
390+
391+
try:
366392
key = self._get_key(path, validate=False)
393+
bucket = key.bucket
394+
except Exception as e:
395+
# Handle cases where the bucket might not exist or connection fails
396+
LOG.error(f"Failed to connect to bucket '{bucket_name}'. Error: {e}")
397+
raise S3FileSystemException(f"Could not access bucket '{bucket_name}'.") from e
367398

368-
if key.exists():
369-
dir_keys = []
399+
if key.exists() or is_directory_key: # Check both key.exists() and isdir to handle virtual dirs
400+
keys_to_delete = []
370401

371-
if self.isdir(path):
372-
_, dir_key_name = s3.parse_uri(path)[:2]
373-
dir_keys = key.bucket.list(prefix=dir_key_name)
402+
if is_directory_key:
403+
for k in bucket.list(prefix=key_name):
404+
keys_to_delete.append(k)
374405

375-
if not dir_keys:
376-
# Avoid Raz bulk delete issue
377-
deleted_key = key.delete()
378-
if deleted_key.exists():
379-
raise S3FileSystemException('Could not delete key %s' % deleted_key)
380-
else:
381-
result = key.bucket.delete_keys(list(dir_keys))
406+
# Explicitly add the current directory marker (empty object) if it exists but wasn't included
407+
dir_marker = bucket.get_key(key_name)
408+
if dir_marker is not None and dir_marker not in keys_to_delete:
409+
keys_to_delete.append(dir_marker)
410+
else:
411+
# Add the single key object
412+
keys_to_delete.append(key)
413+
414+
LOG.info(f"Found {len(keys_to_delete)} S3 object(s) to delete under prefix '{key_name}'.")
415+
416+
# Calculate total chunks using integer ceiling division.
417+
total_chunks = (len(keys_to_delete) + S3A_DELETE_CHUNK_SIZE - 1) // S3A_DELETE_CHUNK_SIZE
418+
all_errors = []
419+
420+
# Process keys in chunks of 1000 (S3 API limit)
421+
for i in range(0, len(keys_to_delete), S3A_DELETE_CHUNK_SIZE):
422+
chunk = keys_to_delete[i : i + S3A_DELETE_CHUNK_SIZE]
423+
424+
LOG.debug(f"Deleting chunk {i // S3A_DELETE_CHUNK_SIZE + 1} of {total_chunks} (size: {len(chunk)} keys).")
425+
try:
426+
result = bucket.delete_keys(chunk)
382427
if result.errors:
383-
msg = "%d errors occurred while attempting to delete the following S3 paths:\n%s" % (
384-
len(result.errors), '\n'.join(['%s: %s' % (error.key, error.message) for error in result.errors])
385-
)
386-
LOG.error(msg)
387-
raise S3FileSystemException(msg)
428+
LOG.warning(f"Encountered {len(result.errors)} errors in this deletion chunk.")
429+
all_errors.extend(result.errors)
430+
except S3ResponseError as e:
431+
# Catch potential connection errors or access denied on the delete call itself
432+
LOG.error(f"An S3 API error occurred during key deletion: {e}")
433+
raise S3FileSystemException(f"Failed to delete objects: {e.message}") from e
434+
435+
# After deleting all keys, handle any accumulated errors
436+
if all_errors:
437+
error_details = "\n".join([f"- {err.key}: {err.message}" for err in all_errors])
438+
msg = f"{len(all_errors)} errors occurred while deleting objects from '{path}':\n{error_details}"
439+
LOG.error(msg)
440+
raise S3FileSystemException(msg)
441+
442+
LOG.info(f"Successfully deleted {len(keys_to_delete)} object(s) from path: {path}")
388443

389444
@translate_s3_error
390445
@auth_error_handler

0 commit comments

Comments
 (0)