Skip to content

Commit 0f04e73

Browse files
SRIKKANTHLiliDeng
authored andcommitted
Fix for serial disk tests failures when disk controller is NVMe
Fix for serial disk tests failures when disk controller is NVMe
1 parent 191a68b commit 0f04e73

File tree

2 files changed

+137
-53
lines changed

2 files changed

+137
-53
lines changed

lisa/features/nvme.py

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import re
55
from dataclasses import dataclass, field
66
from functools import partial
7-
from typing import Any, List, Type
7+
from typing import Any, List, Pattern, Type
88

99
from dataclasses_json import dataclass_json
1010

@@ -15,11 +15,13 @@
1515
from lisa.schema import FeatureSettings
1616
from lisa.tools import Ls, Lspci, Nvmecli
1717
from lisa.tools.lspci import PciDevice
18-
from lisa.util import field_metadata, get_matched_str
18+
from lisa.util import LisaException, field_metadata, get_matched_str
1919
from lisa.util.constants import DEVICE_TYPE_NVME
2020

2121

2222
class Nvme(Feature):
23+
_os_disk_namespace = ""
24+
_os_disk_controller = ""
2325
# crw------- 1 root root 251, 0 Jun 21 03:08 /dev/nvme0
2426
# crw------- 1 root root 251, 0 Jun 21 03:08 /dev/nvme10
2527
_device_pattern = re.compile(r".*(?P<device_name>/dev/nvme[0-9]+$)", re.MULTILINE)
@@ -43,6 +45,9 @@ class Nvme(Feature):
4345
# /dev/nvme0n1p15 -> /dev/nvme0n1
4446
NVME_NAMESPACE_PATTERN = re.compile(r"/dev/nvme[0-9]+n[0-9]+", re.M)
4547

48+
# /dev/nvme0n1p15 -> /dev/nvme0
49+
NVME_CONTROLLER_PATTERN = re.compile(r"/dev/nvme[0-9]+", re.M)
50+
4651
# /dev/nvme0n1p15 -> /dev/nvme0n1
4752
NVME_DEVICE_PATTERN = re.compile(r"/dev/nvme[0-9]+", re.M)
4853

@@ -79,8 +84,49 @@ def get_namespaces(self) -> List[str]:
7984
matched_result = self._namespace_pattern.match(row)
8085
if matched_result:
8186
namespaces.append(matched_result.group("namespace"))
87+
# When disk controller type is NVMe, OS disk will show up as NVMe device.
88+
# Removing OS disk from the list of NVMe devices which is not an NVMe device.
8289
return self._remove_nvme_os_disk(namespaces)
8390

91+
def get_nvme_os_disk_info(self, pattern: Pattern[str]) -> str:
92+
"""
93+
Generic function to get OS disk nvme information based on the provided pattern.
94+
"""
95+
os_disk_info = ""
96+
node_disk = self._node.features[Disk]
97+
if node_disk.get_os_disk_controller_type() == schema.DiskControllerType.NVME:
98+
os_boot_partition = node_disk.get_os_boot_partition()
99+
if os_boot_partition:
100+
os_disk_info = get_matched_str(
101+
os_boot_partition.name,
102+
pattern,
103+
)
104+
else:
105+
raise LisaException("OS disk is not of type NVMe.")
106+
return os_disk_info
107+
108+
def get_nvme_os_disk_controller(self) -> str:
109+
# Getting OS disk nvme controller.
110+
# Sample os_boot_partition:
111+
# name: /dev/nvme0n1p15, disk: nvme, mount_point: /boot/efi, type: vfat
112+
# In the above example, '/dev/nvme0' is the controller.
113+
if self._os_disk_controller == "":
114+
self._os_disk_controller = self.get_nvme_os_disk_info(
115+
self.NVME_CONTROLLER_PATTERN
116+
)
117+
return self._os_disk_controller
118+
119+
def get_nvme_os_disk_namespace(self) -> str:
120+
# Getting OS disk nvme namespace.
121+
# Sample os_boot_partition:
122+
# name: /dev/nvme0n1p15, disk: nvme, mount_point: /boot/efi, type: vfat
123+
# In the above example, '/dev/nvme0n1' is the namespace.
124+
if self._os_disk_namespace == "":
125+
self._os_disk_namespace = self.get_nvme_os_disk_info(
126+
self.NVME_NAMESPACE_PATTERN
127+
)
128+
return self._os_disk_namespace
129+
84130
# With disk controller type NVMe (ASAP), OS disk along with all remote iSCSI devices
85131
# appears as NVMe.
86132
# Removing OS disk from the list of NVMe devices will remove all the

lisa/sut_orchestrator/azure/features.py

Lines changed: 89 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
Lsblk,
6565
Lspci,
6666
Modprobe,
67+
Nvmecli,
6768
Rm,
6869
Sed,
6970
)
@@ -1632,12 +1633,6 @@ class Disk(AzureFeatureMixin, features.Disk):
16321633
r"^(?!\s*#)\s*mounts:\s+-\s*\[\s*ephemeral[0-9]+,\s*([^,\s]+)\s*\]", re.M
16331634
)
16341635

1635-
# /dev/nvme0n1p15 -> /dev/nvme0n1
1636-
NVME_NAMESPACE_PATTERN = re.compile(r"/dev/nvme[0-9]+n[0-9]+", re.M)
1637-
1638-
# /dev/nvme0n1p15 -> /dev/nvme0
1639-
NVME_CONTROLLER_PATTERN = re.compile(r"/dev/nvme[0-9]+", re.M)
1640-
16411636
# <Msft Virtual Disk 1.0> at scbus0 target 0 lun 0 (pass0,da0)
16421637
# <Msft Virtual Disk 1.0> at scbus0 target 0 lun 1 (pass1,da1)
16431638
# <Msft Virtual DVD-ROM 1.0> at scbus0 target 0 lun 2 (pass2,cd0)
@@ -1719,34 +1714,78 @@ def _get_scsi_data_disks(self) -> List[str]:
17191714
return azure_scsi_disks
17201715

17211716
def get_luns(self) -> Dict[str, int]:
1722-
# disk_controller_type == SCSI
17231717
# get azure scsi attached disks
17241718
device_luns = {}
1725-
if isinstance(self._node.os, BSD):
1726-
cmd_result = self._node.execute(
1727-
"camcontrol devlist",
1728-
shell=True,
1729-
sudo=True,
1719+
# If disk_controller_type == NVME
1720+
#
1721+
# LUN numbers is a concept of SCSI and not applicable for NVME.
1722+
# But in Azure, remote data disks attached with NVME disk controller show LUN
1723+
# numbers in "Namespace" field of nvme-cli output with an off set of +2.
1724+
# If we reduce the Namespace id by 2 we can get the LUN id of any attached
1725+
# azure data disks.
1726+
# Example:
1727+
# root@lisa--170-e0-n0:/home/lisa# nvme -list
1728+
# Node SN Model Namespace Usage Format FW Rev # noqa: E501
1729+
# --------------------- -------------------- ---------------------------------------- --------- -------------------------- ---------------- ------- # noqa: E501
1730+
# /dev/nvme0n1 SN: 000001 MSFT NVMe Accelerator v1.0 1 29.87 GB / 29.87 GB 512 B + 0 B v1.0000 # noqa: E501
1731+
# /dev/nvme0n2 SN: 000001 MSFT NVMe Accelerator v1.0 2 4.29 GB / 4.29 GB 512 B + 0 B v1.0000 # noqa: E501
1732+
# /dev/nvme0n3 SN: 000001 MSFT NVMe Accelerator v1.0 15 44.02 GB / 44.02 GB 512 B + 0 B v1.0000 # noqa: E501
1733+
# /dev/nvme0n4 SN: 000001 MSFT NVMe Accelerator v1.0 14 6.44 GB / 6.44 GB 512 B + 0 B v1.0000 # noqa: E501
1734+
# /dev/nvme1n1 68e8d42a7ed4e5f90002 Microsoft NVMe Direct Disk v2 1 472.45 GB / 472.45 GB 512 B + 0 B NVMDV00 # noqa: E501
1735+
# /dev/nvme2n1 68e8d42a7ed4e5f90001 Microsoft NVMe Direct Disk v2 1 472.45 GB / 472.45 GB 512 B + 0 B NVMDV00 # noqa: E501
1736+
#
1737+
# In the above output all devices starting with /dev/nvme0 are
1738+
# Azure remote data disks.
1739+
# /dev/nvme0n1 is the OS disk and
1740+
# /dev/nvme0n2, /dev/nvme0n3, /dev/nvme0n4 are azure remote data disks.
1741+
# They are connected at LUN 0, 13 and 12 respectively and their Namespace ids
1742+
# are 2, 15 and 14.
1743+
node_disk = self._node.features[Disk]
1744+
if node_disk.get_os_disk_controller_type() == schema.DiskControllerType.NVME:
1745+
data_disks = self.get_raw_data_disks()
1746+
nvme_device_ids = self._node.tools[Nvmecli].get_namespace_ids(
1747+
force_run=True
17301748
)
1731-
for line in cmd_result.stdout.splitlines():
1732-
match = self.LUN_PATTERN_BSD.search(line)
1733-
if match:
1734-
lun_number = int(match.group(1))
1735-
device_name = match.group(2)
1736-
device_luns.update({device_name: lun_number})
1737-
else:
1738-
azure_scsi_disks = self._get_scsi_data_disks()
1739-
device_luns = {}
1740-
lun_number_pattern = re.compile(r"[0-9]+$", re.M)
1741-
for disk in azure_scsi_disks:
1742-
# /dev/disk/azure/scsi1/lun20 -> 20
1743-
device_lun = int(get_matched_str(disk, lun_number_pattern))
1744-
# readlink -f /dev/disk/azure/scsi1/lun0
1745-
# /dev/sdc
1749+
1750+
for nvme_device_id in nvme_device_ids:
1751+
nvme_device_file = list(nvme_device_id.keys())[0]
1752+
if self._is_remote_data_disk(nvme_device_file):
1753+
# Reduce 2 from Namespace to get the actual LUN number.
1754+
device_lun = int(list(nvme_device_id.values())[0]) - 2
1755+
if nvme_device_file in data_disks:
1756+
device_luns.update(
1757+
{
1758+
nvme_device_file: device_lun,
1759+
}
1760+
)
1761+
return device_luns
1762+
# If disk_controller_type == SCSI
1763+
elif node_disk.get_os_disk_controller_type() == schema.DiskControllerType.SCSI:
1764+
if isinstance(self._node.os, BSD):
17461765
cmd_result = self._node.execute(
1747-
f"readlink -f {disk}", shell=True, sudo=True
1766+
"camcontrol devlist",
1767+
shell=True,
1768+
sudo=True,
17481769
)
1749-
device_luns.update({cmd_result.stdout: device_lun})
1770+
for line in cmd_result.stdout.splitlines():
1771+
match = self.LUN_PATTERN_BSD.search(line)
1772+
if match:
1773+
lun_number = int(match.group(1))
1774+
device_name = match.group(2)
1775+
device_luns.update({device_name: lun_number})
1776+
else:
1777+
azure_scsi_disks = self._get_scsi_data_disks()
1778+
device_luns = {}
1779+
lun_number_pattern = re.compile(r"[0-9]+$", re.M)
1780+
for disk in azure_scsi_disks:
1781+
# /dev/disk/azure/scsi1/lun20 -> 20
1782+
device_lun = int(get_matched_str(disk, lun_number_pattern))
1783+
# readlink -f /dev/disk/azure/scsi1/lun0
1784+
# /dev/sdc
1785+
cmd_result = self._node.execute(
1786+
f"readlink -f {disk}", shell=True, sudo=True
1787+
)
1788+
device_luns.update({cmd_result.stdout: device_lun})
17501789
return device_luns
17511790

17521791
def get_raw_data_disks(self) -> List[str]:
@@ -1757,32 +1796,15 @@ def get_raw_data_disks(self) -> List[str]:
17571796
# disk_controller_type == NVME
17581797
node_disk = self._node.features[Disk]
17591798
if node_disk.get_os_disk_controller_type() == schema.DiskControllerType.NVME:
1760-
# Getting OS disk nvme namespace and disk controller used by OS disk.
1761-
# Sample os_boot_partition:
1762-
# name: /dev/nvme0n1p15, disk: nvme, mount_point: /boot/efi, type: vfat
1763-
os_boot_partition = node_disk.get_os_boot_partition()
1764-
if os_boot_partition:
1765-
os_disk_namespace = get_matched_str(
1766-
os_boot_partition.name,
1767-
self.NVME_NAMESPACE_PATTERN,
1768-
)
1769-
os_disk_controller = get_matched_str(
1770-
os_boot_partition.name,
1771-
self.NVME_CONTROLLER_PATTERN,
1772-
)
1773-
17741799
# With NVMe disk controller type, all remote SCSI disks are connected to
17751800
# same NVMe controller. The same controller is used by OS disk.
17761801
# This loop collects all the SCSI remote disks except OS disk.
1777-
nvme = self._node.features[Nvme]
1778-
nvme_namespaces = nvme.get_namespaces()
1802+
nvme_cli = self._node.tools[Nvmecli]
1803+
nvme_disks = nvme_cli.get_disks(force_run=True)
17791804
disk_array = []
1780-
for name_space in nvme_namespaces:
1781-
if (
1782-
name_space.startswith(os_disk_controller)
1783-
and name_space != os_disk_namespace
1784-
):
1785-
disk_array.append(name_space)
1805+
for nvme_disk in nvme_disks:
1806+
if self._is_remote_data_disk(nvme_disk):
1807+
disk_array.append(nvme_disk)
17861808
return disk_array
17871809

17881810
# disk_controller_type == SCSI
@@ -2007,6 +2029,22 @@ def _is_resource_disk(self, disk: DiskInfo) -> bool:
20072029
partition.mountpoint == "/mnt/resource" for partition in disk.partitions
20082030
)
20092031

2032+
def _is_remote_data_disk(self, disk: str) -> bool:
2033+
# If disk_controller_type == NVME
2034+
nvme = self._node.features[Nvme]
2035+
if self.get_os_disk_controller_type() == schema.DiskControllerType.NVME:
2036+
os_disk_namespace = nvme.get_nvme_os_disk_namespace()
2037+
os_disk_controller = nvme.get_nvme_os_disk_controller()
2038+
# When disk_controller_type is NVME, all remote disks are connected to
2039+
# same NVMe controller. The same controller is used by OS disk.
2040+
if disk.startswith(os_disk_controller) and disk != os_disk_namespace:
2041+
return True
2042+
return False
2043+
2044+
# If disk_controller_type == SCSI
2045+
azure_scsi_disks = self._get_scsi_data_disks()
2046+
return disk in azure_scsi_disks
2047+
20102048
def _get_raw_data_disks_bsd(self) -> List[str]:
20112049
disks = self._node.tools[Lsblk].get_disks()
20122050

0 commit comments

Comments
 (0)