Skip to content

Commit 0a96dbf

Browse files
committed
Add tests
1 parent 9680c5c commit 0a96dbf

File tree

2 files changed

+246
-11
lines changed

2 files changed

+246
-11
lines changed

python/tests/test_highlevel.py

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5668,3 +5668,233 @@ def test_different_node_flags(self):
56685668
result = ts.sample_nodes_by_ploidy(2)
56695669
assert result.shape == (1, 2)
56705670
assert_array_equal(result, np.array([[0, 2]]))
5671+
5672+
5673+
class TestMapToVcfModel:
5674+
def test_no_individuals_default_ploidy(self):
5675+
ts = tskit.Tree.generate_balanced(4).tree_sequence
5676+
assert ts.num_individuals == 0
5677+
5678+
# Default ploidy should be 1
5679+
result = ts.map_to_vcf_model()
5680+
assert isinstance(result, tskit.VcfModelMapping)
5681+
assert result.individuals_nodes.shape == (4, 1)
5682+
for i in range(4):
5683+
assert result.individuals_nodes[i, 0] == i
5684+
assert result.individuals_name.shape == (4,)
5685+
for i in range(4):
5686+
assert result.individuals_name[i] == f"tsk_{i}"
5687+
5688+
def test_no_individuals_custom_ploidy(self):
5689+
ts = tskit.Tree.generate_balanced(6).tree_sequence
5690+
assert ts.num_individuals == 0
5691+
5692+
# Use ploidy = 2
5693+
result = ts.map_to_vcf_model(ploidy=2)
5694+
assert isinstance(result, tskit.VcfModelMapping)
5695+
assert result.individuals_nodes.shape == (3, 2)
5696+
for i in range(3):
5697+
assert result.individuals_nodes[i, 0] == i * 2
5698+
assert result.individuals_nodes[i, 1] == i * 2 + 1
5699+
assert result.individuals_name.shape == (3,)
5700+
for i in range(3):
5701+
assert result.individuals_name[i] == f"tsk_{i}"
5702+
5703+
def test_no_individuals_uneven_ploidy(self):
5704+
ts = tskit.Tree.generate_balanced(5).tree_sequence
5705+
# This tree sequence has no individuals
5706+
assert ts.num_individuals == 0
5707+
5708+
# 5 samples cannot be evenly divided into ploidy=2
5709+
with pytest.raises(ValueError, match="not a multiple"):
5710+
ts.map_to_vcf_model(ploidy=2)
5711+
5712+
def test_with_individuals(self):
5713+
ts = msprime.sim_ancestry(
5714+
5,
5715+
random_seed=42,
5716+
)
5717+
result = ts.map_to_vcf_model()
5718+
assert isinstance(result, tskit.VcfModelMapping)
5719+
assert result.individuals_nodes.shape == (5, 2)
5720+
assert np.array_equal(
5721+
result.individuals_nodes,
5722+
np.array([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]),
5723+
)
5724+
assert result.individuals_name.shape == (5,)
5725+
for i in range(5):
5726+
assert result.individuals_name[i] == f"tsk_{i}"
5727+
5728+
def test_with_individuals_and_ploidy_error(self):
5729+
tables = tskit.TableCollection(1.0)
5730+
tables.individuals.add_row()
5731+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5732+
ts = tables.tree_sequence()
5733+
5734+
with pytest.raises(ValueError, match="Cannot specify ploidy when individuals"):
5735+
ts.map_to_vcf_model(ploidy=2)
5736+
5737+
def test_specific_individuals(self):
5738+
tables = tskit.TableCollection(1.0)
5739+
# Create 5 individuals with varying ploidy
5740+
for i in range(5):
5741+
tables.individuals.add_row()
5742+
# Individuals have ploidy i+1
5743+
for _ in range(i + 1):
5744+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=i)
5745+
ts = tables.tree_sequence()
5746+
5747+
result = ts.map_to_vcf_model(individuals=[1, 3])
5748+
assert isinstance(result, tskit.VcfModelMapping)
5749+
# Individual 1 has ploidy 2, individual 3 has ploidy 4
5750+
assert result.individuals_nodes.shape == (2, 4)
5751+
5752+
assert result.individuals_nodes[0, 0] == 1
5753+
assert result.individuals_nodes[0, 1] == 2
5754+
assert result.individuals_nodes[0, 2] == -1
5755+
assert result.individuals_nodes[0, 3] == -1
5756+
5757+
assert result.individuals_nodes[1, 0] == 6
5758+
assert result.individuals_nodes[1, 1] == 7
5759+
assert result.individuals_nodes[1, 2] == 8
5760+
assert result.individuals_nodes[1, 3] == 9
5761+
5762+
assert result.individuals_name.shape == (2,)
5763+
assert result.individuals_name[0] == "tsk_1"
5764+
assert result.individuals_name[1] == "tsk_3"
5765+
5766+
def test_individual_with_no_nodes_warning(self):
5767+
tables = tskit.TableCollection(1.0)
5768+
# Individual with no nodes
5769+
tables.individuals.add_row()
5770+
# Individual with nodes
5771+
tables.individuals.add_row()
5772+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5773+
ts = tables.tree_sequence()
5774+
5775+
with warnings.catch_warnings(record=True) as w:
5776+
result = ts.map_to_vcf_model()
5777+
assert len(w) == 1
5778+
assert "Individual 0 has no nodes" in str(w[0].message)
5779+
5780+
# Should only include individual 1
5781+
assert result.individuals_nodes.shape == (1, 1)
5782+
assert result.individuals_nodes[0, 0] == 0
5783+
assert result.individuals_name.shape == (1,)
5784+
assert result.individuals_name[0] == "tsk_1"
5785+
5786+
def test_individual_with_no_nodes_error(self):
5787+
tables = tskit.TableCollection(1.0)
5788+
# Individual with no nodes
5789+
tables.individuals.add_row()
5790+
# Individual with nodes
5791+
tables.individuals.add_row()
5792+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5793+
ts = tables.tree_sequence()
5794+
5795+
with pytest.raises(ValueError, match="Individual 0 has no nodes"):
5796+
ts.map_to_vcf_model(individuals=[0])
5797+
5798+
def test_invalid_individual_id(self):
5799+
tables = tskit.TableCollection(1.0)
5800+
tables.individuals.add_row()
5801+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5802+
ts = tables.tree_sequence()
5803+
5804+
with pytest.raises(ValueError, match="Invalid individual ID"):
5805+
ts.map_to_vcf_model(individuals=[-1])
5806+
5807+
with pytest.raises(ValueError, match="Invalid individual ID"):
5808+
ts.map_to_vcf_model(individuals=[1])
5809+
5810+
def test_mixed_sample_non_sample_warning(self):
5811+
tables = tskit.TableCollection(1.0)
5812+
tables.individuals.add_row()
5813+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5814+
tables.nodes.add_row(flags=0, time=0, individual=0) # Non-sample node
5815+
ts = tables.tree_sequence()
5816+
5817+
with warnings.catch_warnings(record=True) as w:
5818+
ts.map_to_vcf_model()
5819+
assert len(w) == 1
5820+
assert "Individual 0 has both sample and non-sample nodes" in str(
5821+
w[0].message
5822+
)
5823+
5824+
def test_samples_without_individuals_warning(self):
5825+
tables = tskit.TableCollection(1.0)
5826+
tables.individuals.add_row()
5827+
# Node with individual
5828+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5829+
# Node without individual
5830+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=tskit.NULL)
5831+
ts = tables.tree_sequence()
5832+
5833+
with warnings.catch_warnings(record=True) as w:
5834+
ts.map_to_vcf_model()
5835+
assert len(w) == 1
5836+
assert "At least one sample node does not have an individual ID" in str(
5837+
w[0].message
5838+
)
5839+
5840+
def test_metadata_key_for_names(self):
5841+
tables = tskit.TableCollection(1.0)
5842+
5843+
# Add individuals with metadata
5844+
tables.individuals.metadata_schema = tskit.MetadataSchema(
5845+
{
5846+
"codec": "json",
5847+
"type": "object",
5848+
"properties": {"name": {"type": "string"}},
5849+
}
5850+
)
5851+
tables.individuals.add_row(metadata={"name": "ind1"})
5852+
tables.individuals.add_row(metadata={"name": "ind2"})
5853+
5854+
# Add nodes
5855+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5856+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5857+
ts = tables.tree_sequence()
5858+
5859+
result = ts.map_to_vcf_model(name_metadata_key="name")
5860+
assert result.individuals_name.shape == (2,)
5861+
assert result.individuals_name[0] == "ind1"
5862+
assert result.individuals_name[1] == "ind2"
5863+
5864+
def test_custom_individual_names(self):
5865+
tables = tskit.TableCollection(1.0)
5866+
tables.individuals.add_row()
5867+
tables.individuals.add_row()
5868+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5869+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5870+
ts = tables.tree_sequence()
5871+
5872+
custom_names = ["individual_A", "individual_B"]
5873+
result = ts.map_to_vcf_model(individual_names=custom_names)
5874+
assert result.individuals_name.shape == (2,)
5875+
assert result.individuals_name[0] == "individual_A"
5876+
assert result.individuals_name[1] == "individual_B"
5877+
5878+
def test_name_conflict_error(self):
5879+
tables = tskit.TableCollection(1.0)
5880+
ts = tables.tree_sequence()
5881+
with pytest.raises(
5882+
ValueError,
5883+
match="Cannot specify both name_metadata_key and individual_names",
5884+
):
5885+
ts.map_to_vcf_model(
5886+
name_metadata_key="name", individual_names=["custom_name"]
5887+
)
5888+
5889+
def test_name_count_mismatch_error(self):
5890+
tables = tskit.TableCollection(1.0)
5891+
tables.individuals.add_row()
5892+
tables.individuals.add_row()
5893+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5894+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5895+
ts = tables.tree_sequence()
5896+
5897+
with pytest.raises(
5898+
ValueError, match="number of individuals does not match the number of names"
5899+
):
5900+
ts.map_to_vcf_model(individual_names=["only_one_name"])

python/tskit/trees.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10551,9 +10551,8 @@ def map_to_vcf_model(
1055110551
"Cannot specify ploidy when individuals are present in the tree sequence"
1055210552
)
1055310553

10554-
if any(
10555-
self.num_individuals > 0
10556-
and np.logical_and(
10554+
if self.num_individuals > 0 and np.any(
10555+
np.logical_and(
1055710556
self.nodes_individual == tskit.NULL,
1055810557
self.nodes_flags & tskit.NODE_IS_SAMPLE,
1055910558
)
@@ -10566,6 +10565,7 @@ def map_to_vcf_model(
1056610565
if ploidy is None:
1056710566
ploidy = 1
1056810567
individuals_nodes = self.sample_nodes_by_ploidy(ploidy)
10568+
ts_individual_names = [f"tsk_{i}" for i in range(len(individuals_nodes))]
1056910569
else:
1057010570
individuals_nodes = []
1057110571
ts_individual_names = []
@@ -10578,15 +10578,15 @@ def map_to_vcf_model(
1057810578
)
1057910579
continue
1058010580
is_sample = np.array(
10581-
[self.node_flags(u) & tskit.NODE_IS_SAMPLE for u in ind.nodes]
10581+
[self.nodes_flags[u] & tskit.NODE_IS_SAMPLE for u in ind.nodes]
1058210582
)
1058310583
if all(is_sample):
1058410584
individuals_nodes.append(ind.nodes)
1058510585
if name_metadata_key is not None:
1058610586
ts_individual_names.append(ind.metadata[name_metadata_key])
1058710587
else:
1058810588
ts_individual_names.append(f"tsk_{ind.id}")
10589-
elif all(~is_sample):
10589+
elif all(np.logical_not(is_sample)):
1059010590
continue
1059110591
else:
1059210592
warnings.warn(
@@ -10609,11 +10609,16 @@ def map_to_vcf_model(
1060910609
else:
1061010610
ts_individual_names.append(f"tsk_{ind.id}")
1061110611

10612-
max_nodes = max(len(nodes) for nodes in individuals_nodes)
10613-
result = np.full((len(individuals_nodes), max_nodes), -1, dtype=np.int32)
10614-
for i, nodes in enumerate(individuals_nodes):
10615-
result[i, : len(nodes)] = nodes
10616-
individuals_nodes = result
10612+
if len(individuals_nodes) > 0:
10613+
max_nodes = max(len(nodes) for nodes in individuals_nodes)
10614+
result = np.full(
10615+
(len(individuals_nodes), max_nodes), -1, dtype=np.int32
10616+
)
10617+
for i, nodes in enumerate(individuals_nodes):
10618+
result[i, : len(nodes)] = nodes
10619+
individuals_nodes = result
10620+
else:
10621+
individuals_nodes = np.empty((0, 0), dtype=np.int32)
1061710622

1061810623
if individual_names is None:
1061910624
individual_names = ts_individual_names
@@ -10624,7 +10629,7 @@ def map_to_vcf_model(
1062410629
"The number of individuals does not match the number of names"
1062510630
)
1062610631

10627-
return VcfModelMapping(result, individual_names)
10632+
return VcfModelMapping(individuals_nodes, individual_names)
1062810633

1062910634
############################################
1063010635
#

0 commit comments

Comments
 (0)