jupyter-server · Zsailer · Apr 16, 2024 · Apr 12, 2024 · Apr 13, 2024 · Apr 16, 2024
diff --git a/jupyter_server_fileid/manager.py b/jupyter_server_fileid/manager.py
@@ -312,20 +312,19 @@ def _from_normalized_path(self, path: Optional[str]) -> Optional[str]:
 
     def _create(self, path: str) -> str:
         path = self._normalize_path(path)
+        row = self.con.execute("SELECT id FROM Files WHERE path = ?", (path,)).fetchone()
+        existing_id = row and row[0]
+
+        if existing_id:
+            return existing_id
+
         id = self._uuid()
         self.con.execute("INSERT INTO Files (id, path) VALUES (?, ?)", (id, path))
         return id
 
     def index(self, path: str) -> str:
+        # create new record
         with self.con:
-            path = self._normalize_path(path)
-            row = self.con.execute("SELECT id FROM Files WHERE path = ?", (path,)).fetchone()
-            existing_id = row and row[0]
-
-            if existing_id:
-                return existing_id
-
-            # create new record
             id = self._create(path)
             return id
 
@@ -625,6 +624,7 @@ def _sync_file(self, path, stat_info):
         # otherwise update existing record with new path, moving any indexed
         # children if necessary. then return its id
         self._update(id, path=path)
+
         if stat_info.is_dir and old_path != path:
             self._move_recursive(old_path, path)
             self._update_cursor = True
@@ -671,6 +671,17 @@ def _create(self, path, stat_info):
         dangerous and may throw a runtime error if the file is not guaranteed to
         have a unique `ino`.
         """
+        # If the path exists
+        existing_id, ino = None, None
+        row = self.con.execute("SELECT id, ino FROM Files WHERE path = ?", (path,)).fetchone()
+        if row:
+            existing_id, ino = row
+
+        # If the file ID already exists and the current file matches our records
+        # return the file ID instead of creating a new one.
+        if existing_id and stat_info.ino == ino:
+            return existing_id
+
         id = self._uuid()
         self.con.execute(
             "INSERT INTO Files (id, path, ino, crtime, mtime, is_dir) VALUES (?, ?, ?, ?, ?, ?)",

diff --git a/tests/test_manager.py b/tests/test_manager.py
@@ -1,7 +1,6 @@
 import ntpath
 import os
 import posixpath
-import sqlite3
 import sys
 from unittest.mock import patch
 
@@ -618,42 +617,3 @@ def test_db_journal_mode(any_fid_manager_class, fid_db_path, jp_root_dir, db_jou
         cursor = fid_manager.con.execute("PRAGMA journal_mode")
         actual_journal_mode = cursor.fetchone()
         assert actual_journal_mode[0].upper() == expected_journal_mode
-
-
-# This test demonstrates an issue raised in
-# https://github.com/jupyter-server/jupyter_server_fileid/pull/76
-# which was later fixed in
-# https://github.com/jupyter-server/jupyter_server_fileid/pull/77
-#
-# We use this unit test to catch this edge case and ensure
-# its covered going forward.
-def test_multiple_fileIdManager_connections_after_exception(fid_db_path):
-    original_file_path = "/path/to/file"
-    copy_location = "/path/to/copy"
-    another_copy_location = "/path/to/other"
-
-    # Setup an initial file ID manager connected to a sqlite database.
-    manager_1 = ArbitraryFileIdManager(db_path=fid_db_path)
-
-    # Create an initial ID for this file
-    manager_1.index(original_file_path)
-    # Copy the file
-    manager_1.copy(original_file_path, copy_location)
-    # Try copying the file again.
-    excepted = False
-    try:
-        manager_1.copy(original_file_path, copy_location)
-    # We expect this to fail because the file is already in the database.
-    except sqlite3.IntegrityError:
-        excepted = True
-        pass
-
-    assert excepted, "Copying to the same location should raise an exception, but it did not here."
-
-    # Previously, these actions locked the database for future connections.
-    # This was fixed in: https://github.com/jupyter-server/jupyter_server_fileid/pull/77
-
-    # Try making a second connection that writes to the DB and
-    # make sure no exceptions were raised.
-    manager_2 = ArbitraryFileIdManager(db_path=fid_db_path)
-    manager_2.copy(original_file_path, another_copy_location)