Skip to content

Use a context manager for all write actions to prevent indefinite database locks #77

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ jobs:
uses: jupyterlab/maintainer-tools/.github/actions/base-setup@v1
with:
python_version: "3.7"
- name: Install miniumum versions
uses: jupyterlab/maintainer-tools/.github/actions/install-minimums@v1
dependency_type: minimum
- name: Install
run: pip install -e ".[test]"
- name: Run the unit tests
run: |
pytest -vv -W default || pytest -vv -W default --lf
Expand Down
180 changes: 89 additions & 91 deletions jupyter_server_fileid/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,17 +317,17 @@ def _create(self, path: str) -> str:
return id

def index(self, path: str) -> str:
path = self._normalize_path(path)
row = self.con.execute("SELECT id FROM Files WHERE path = ?", (path,)).fetchone()
existing_id = row and row[0]
with self.con:
path = self._normalize_path(path)
row = self.con.execute("SELECT id FROM Files WHERE path = ?", (path,)).fetchone()
existing_id = row and row[0]

if existing_id:
return existing_id
if existing_id:
return existing_id

# create new record
id = self._create(path)
self.con.commit()
return id
# create new record
id = self._create(path)
return id

def get_id(self, path: str) -> Optional[str]:
path = self._normalize_path(path)
Expand All @@ -340,37 +340,36 @@ def get_path(self, id: str) -> Optional[str]:
return self._from_normalized_path(path)

def move(self, old_path: str, new_path: str) -> None:
old_path = self._normalize_path(old_path)
new_path = self._normalize_path(new_path)
row = self.con.execute("SELECT id FROM Files WHERE path = ?", (old_path,)).fetchone()
id = row and row[0]

if id:
self.con.execute("UPDATE Files SET path = ? WHERE path = ?", (new_path, old_path))
self._move_recursive(old_path, new_path, posixpath)
else:
id = self._create(new_path)
with self.con:
old_path = self._normalize_path(old_path)
new_path = self._normalize_path(new_path)
row = self.con.execute("SELECT id FROM Files WHERE path = ?", (old_path,)).fetchone()
id = row and row[0]

if id:
self.con.execute("UPDATE Files SET path = ? WHERE path = ?", (new_path, old_path))
self._move_recursive(old_path, new_path, posixpath)
else:
id = self._create(new_path)

self.con.commit()
return id
return id

def copy(self, from_path: str, to_path: str) -> Optional[str]:
from_path = self._normalize_path(from_path)
to_path = self._normalize_path(to_path)
with self.con:
from_path = self._normalize_path(from_path)
to_path = self._normalize_path(to_path)

id = self._create(to_path)
self._copy_recursive(from_path, to_path, posixpath)
id = self._create(to_path)
self._copy_recursive(from_path, to_path, posixpath)

self.con.commit()
return id
return id

def delete(self, path: str) -> None:
path = self._normalize_path(path)

self.con.execute("DELETE FROM Files WHERE path = ?", (path,))
self._delete_recursive(path, posixpath)
with self.con:
path = self._normalize_path(path)

self.con.commit()
self.con.execute("DELETE FROM Files WHERE path = ?", (path,))
self._delete_recursive(path, posixpath)

def save(self, path: str) -> None:
return
Expand Down Expand Up @@ -718,39 +717,38 @@ def _update(self, id, stat_info=None, path=None):
def index(self, path, stat_info=None, commit=True):
"""Returns the file ID for the file at `path`, creating a new file ID if
one does not exist. Returns None only if file does not exist at path."""
path = self._normalize_path(path)
stat_info = stat_info or self._stat(path)
if not stat_info:
return None
with self.con:
path = self._normalize_path(path)
stat_info = stat_info or self._stat(path)
if not stat_info:
return None

# if file is symlink, then index the path it refers to instead
if stat_info.is_symlink:
return self.index(os.path.realpath(path))
# if file is symlink, then index the path it refers to instead
if stat_info.is_symlink:
return self.index(os.path.realpath(path))

# sync file at path and return file ID if it exists
id = self._sync_file(path, stat_info)
if id is not None:
return id
# sync file at path and return file ID if it exists
id = self._sync_file(path, stat_info)
if id is not None:
return id

# otherwise, create a new record and return the file ID
id = self._create(path, stat_info)
if commit:
self.con.commit()
return id
# otherwise, create a new record and return the file ID
id = self._create(path, stat_info)
return id

def get_id(self, path):
"""Retrieves the file ID associated with a file path. Returns None if
the file has not yet been indexed or does not exist at the given
path."""
path = self._normalize_path(path)
stat_info = self._stat(path)
if not stat_info:
return None
with self.con:
path = self._normalize_path(path)
stat_info = self._stat(path)
if not stat_info:
return None

# then sync file at path and retrieve id, if any
id = self._sync_file(path, stat_info)
self.con.commit()
return id
# then sync file at path and retrieve id, if any
id = self._sync_file(path, stat_info)
return id

def get_path(self, id):
"""Retrieves the file path associated with a file ID. The file path is
Expand Down Expand Up @@ -795,26 +793,26 @@ def get_path(self, id):
def move(self, old_path, new_path):
"""Handles file moves by updating the file path of the associated file
ID. Returns the file ID. Returns None if file does not exist at new_path."""
old_path = self._normalize_path(old_path)
new_path = self._normalize_path(new_path)
with self.con:
old_path = self._normalize_path(old_path)
new_path = self._normalize_path(new_path)

# verify file exists at new_path
stat_info = self._stat(new_path)
if stat_info is None:
return None
# verify file exists at new_path
stat_info = self._stat(new_path)
if stat_info is None:
return None

# sync the file and see if it was already indexed
#
# originally this method did not call _sync_file() for performance
# reasons, but this is needed to handle an edge case:
# https://github.com/jupyter-server/jupyter_server_fileid/issues/62
id = self._sync_file(new_path, stat_info)
if id is None:
# if no existing record, create a new one
id = self._create(new_path, stat_info)
# sync the file and see if it was already indexed
#
# originally this method did not call _sync_file() for performance
# reasons, but this is needed to handle an edge case:
# https://github.com/jupyter-server/jupyter_server_fileid/issues/62
id = self._sync_file(new_path, stat_info)
if id is None:
# if no existing record, create a new one
id = self._create(new_path, stat_info)

self.con.commit()
return id
return id

def _copy_recursive(self, from_path: str, to_path: str, _: str = "") -> None:
"""Copy all children of a given directory at `from_path` to a new
Expand Down Expand Up @@ -859,13 +857,13 @@ def copy(self, from_path, to_path):
def delete(self, path):
"""Handles file deletions by deleting the associated record in the File
table. Returns None."""
path = self._normalize_path(path)
with self.con:
path = self._normalize_path(path)

if os.path.isdir(path):
self._delete_recursive(path)
if os.path.isdir(path):
self._delete_recursive(path)

self.con.execute("DELETE FROM Files WHERE path = ?", (path,))
self.con.commit()
self.con.execute("DELETE FROM Files WHERE path = ?", (path,))

def save(self, path):
"""Handles file saves (edits) by updating recorded stat info.
Expand All @@ -878,21 +876,21 @@ def save(self, path):
JupyterLab. This would (wrongly) preserve the association b/w the old
file ID and the current path rather than create a new file ID.
"""
path = self._normalize_path(path)
with self.con:
path = self._normalize_path(path)

# look up record by ino and path
stat_info = self._stat(path)
row = self.con.execute(
"SELECT id FROM Files WHERE ino = ? AND path = ?", (stat_info.ino, path)
).fetchone()
# if no record exists, return early
if row is None:
return
# look up record by ino and path
stat_info = self._stat(path)
row = self.con.execute(
"SELECT id FROM Files WHERE ino = ? AND path = ?", (stat_info.ino, path)
).fetchone()
# if no record exists, return early
if row is None:
return

# otherwise, update the stat info
(id,) = row
self._update(id, stat_info)
self.con.commit()
# otherwise, update the stat info
(id,) = row
self._update(id, stat_info)

def get_handlers_by_action(self) -> Dict[str, Optional[Callable[[Dict[str, Any]], Any]]]:
return {
Expand Down
40 changes: 40 additions & 0 deletions tests/test_manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import ntpath
import os
import posixpath
import sqlite3
import sys
from unittest.mock import patch

Expand Down Expand Up @@ -617,3 +618,42 @@ def test_db_journal_mode(any_fid_manager_class, fid_db_path, jp_root_dir, db_jou
cursor = fid_manager.con.execute("PRAGMA journal_mode")
actual_journal_mode = cursor.fetchone()
assert actual_journal_mode[0].upper() == expected_journal_mode


# This test demonstrates an issue raised in
# https://github.com/jupyter-server/jupyter_server_fileid/pull/76
# which was later fixed in
# https://github.com/jupyter-server/jupyter_server_fileid/pull/77
#
# We use this unit test to catch this edge case and ensure
# its covered going forward.
def test_multiple_fileIdManager_connections_after_exception(fid_db_path):
original_file_path = "/path/to/file"
copy_location = "/path/to/copy"
another_copy_location = "/path/to/other"

# Setup an initial file ID manager connected to a sqlite database.
manager_1 = ArbitraryFileIdManager(db_path=fid_db_path)

# Create an initial ID for this file
manager_1.index(original_file_path)
# Copy the file
manager_1.copy(original_file_path, copy_location)
# Try copying the file again.
excepted = False
try:
manager_1.copy(original_file_path, copy_location)
# We expect this to fail because the file is already in the database.
except sqlite3.IntegrityError:
excepted = True
pass

assert excepted, "Copying to the same location should raise an exception, but it did not here."

# Previously, these actions locked the database for future connections.
# This was fixed in: https://github.com/jupyter-server/jupyter_server_fileid/pull/77

# Try making a second connection that writes to the DB and
# make sure no exceptions were raised.
manager_2 = ArbitraryFileIdManager(db_path=fid_db_path)
manager_2.copy(original_file_path, another_copy_location)