Skip to content

Check for ID before creating a new record #78

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions jupyter_server_fileid/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,20 +312,19 @@ def _from_normalized_path(self, path: Optional[str]) -> Optional[str]:

def _create(self, path: str) -> str:
path = self._normalize_path(path)
row = self.con.execute("SELECT id FROM Files WHERE path = ?", (path,)).fetchone()
existing_id = row and row[0]

if existing_id:
return existing_id

id = self._uuid()
self.con.execute("INSERT INTO Files (id, path) VALUES (?, ?)", (id, path))
return id

def index(self, path: str) -> str:
# create new record
with self.con:
path = self._normalize_path(path)
row = self.con.execute("SELECT id FROM Files WHERE path = ?", (path,)).fetchone()
existing_id = row and row[0]

if existing_id:
return existing_id

# create new record
id = self._create(path)
return id

Expand Down Expand Up @@ -625,6 +624,7 @@ def _sync_file(self, path, stat_info):
# otherwise update existing record with new path, moving any indexed
# children if necessary. then return its id
self._update(id, path=path)

if stat_info.is_dir and old_path != path:
self._move_recursive(old_path, path)
self._update_cursor = True
Expand Down Expand Up @@ -671,6 +671,17 @@ def _create(self, path, stat_info):
dangerous and may throw a runtime error if the file is not guaranteed to
have a unique `ino`.
"""
# If the path exists
existing_id, ino = None, None
row = self.con.execute("SELECT id, ino FROM Files WHERE path = ?", (path,)).fetchone()
if row:
existing_id, ino = row

# If the file ID already exists and the current file matches our records
# return the file ID instead of creating a new one.
if existing_id and stat_info.ino == ino:
return existing_id

id = self._uuid()
self.con.execute(
"INSERT INTO Files (id, path, ino, crtime, mtime, is_dir) VALUES (?, ?, ?, ?, ?, ?)",
Expand Down
40 changes: 0 additions & 40 deletions tests/test_manager.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import ntpath
import os
import posixpath
import sqlite3
import sys
from unittest.mock import patch

Expand Down Expand Up @@ -618,42 +617,3 @@ def test_db_journal_mode(any_fid_manager_class, fid_db_path, jp_root_dir, db_jou
cursor = fid_manager.con.execute("PRAGMA journal_mode")
actual_journal_mode = cursor.fetchone()
assert actual_journal_mode[0].upper() == expected_journal_mode


# This test demonstrates an issue raised in
# https://github.com/jupyter-server/jupyter_server_fileid/pull/76
# which was later fixed in
# https://github.com/jupyter-server/jupyter_server_fileid/pull/77
#
# We use this unit test to catch this edge case and ensure
# its covered going forward.
def test_multiple_fileIdManager_connections_after_exception(fid_db_path):
original_file_path = "/path/to/file"
copy_location = "/path/to/copy"
another_copy_location = "/path/to/other"

# Setup an initial file ID manager connected to a sqlite database.
manager_1 = ArbitraryFileIdManager(db_path=fid_db_path)

# Create an initial ID for this file
manager_1.index(original_file_path)
# Copy the file
manager_1.copy(original_file_path, copy_location)
# Try copying the file again.
excepted = False
try:
manager_1.copy(original_file_path, copy_location)
# We expect this to fail because the file is already in the database.
except sqlite3.IntegrityError:
excepted = True
pass

assert excepted, "Copying to the same location should raise an exception, but it did not here."

# Previously, these actions locked the database for future connections.
# This was fixed in: https://github.com/jupyter-server/jupyter_server_fileid/pull/77

# Try making a second connection that writes to the DB and
# make sure no exceptions were raised.
manager_2 = ArbitraryFileIdManager(db_path=fid_db_path)
manager_2.copy(original_file_path, another_copy_location)
Loading