@@ -5668,3 +5668,233 @@ def test_different_node_flags(self):
5668
5668
result = ts .sample_nodes_by_ploidy (2 )
5669
5669
assert result .shape == (1 , 2 )
5670
5670
assert_array_equal (result , np .array ([[0 , 2 ]]))
5671
+
5672
+
5673
+ class TestMapToVcfModel :
5674
+ def test_no_individuals_default_ploidy (self ):
5675
+ ts = tskit .Tree .generate_balanced (4 ).tree_sequence
5676
+ assert ts .num_individuals == 0
5677
+
5678
+ # Default ploidy should be 1
5679
+ result = ts .map_to_vcf_model ()
5680
+ assert isinstance (result , tskit .VcfModelMapping )
5681
+ assert result .individuals_nodes .shape == (4 , 1 )
5682
+ for i in range (4 ):
5683
+ assert result .individuals_nodes [i , 0 ] == i
5684
+ assert result .individuals_name .shape == (4 ,)
5685
+ for i in range (4 ):
5686
+ assert result .individuals_name [i ] == f"tsk_{ i } "
5687
+
5688
+ def test_no_individuals_custom_ploidy (self ):
5689
+ ts = tskit .Tree .generate_balanced (6 ).tree_sequence
5690
+ assert ts .num_individuals == 0
5691
+
5692
+ # Use ploidy = 2
5693
+ result = ts .map_to_vcf_model (ploidy = 2 )
5694
+ assert isinstance (result , tskit .VcfModelMapping )
5695
+ assert result .individuals_nodes .shape == (3 , 2 )
5696
+ for i in range (3 ):
5697
+ assert result .individuals_nodes [i , 0 ] == i * 2
5698
+ assert result .individuals_nodes [i , 1 ] == i * 2 + 1
5699
+ assert result .individuals_name .shape == (3 ,)
5700
+ for i in range (3 ):
5701
+ assert result .individuals_name [i ] == f"tsk_{ i } "
5702
+
5703
+ def test_no_individuals_uneven_ploidy (self ):
5704
+ ts = tskit .Tree .generate_balanced (5 ).tree_sequence
5705
+ # This tree sequence has no individuals
5706
+ assert ts .num_individuals == 0
5707
+
5708
+ # 5 samples cannot be evenly divided into ploidy=2
5709
+ with pytest .raises (ValueError , match = "not a multiple" ):
5710
+ ts .map_to_vcf_model (ploidy = 2 )
5711
+
5712
+ def test_with_individuals (self ):
5713
+ ts = msprime .sim_ancestry (
5714
+ 5 ,
5715
+ random_seed = 42 ,
5716
+ )
5717
+ result = ts .map_to_vcf_model ()
5718
+ assert isinstance (result , tskit .VcfModelMapping )
5719
+ assert result .individuals_nodes .shape == (5 , 2 )
5720
+ assert np .array_equal (
5721
+ result .individuals_nodes ,
5722
+ np .array ([[0 , 1 ], [2 , 3 ], [4 , 5 ], [6 , 7 ], [8 , 9 ]]),
5723
+ )
5724
+ assert result .individuals_name .shape == (5 ,)
5725
+ for i in range (5 ):
5726
+ assert result .individuals_name [i ] == f"tsk_{ i } "
5727
+
5728
+ def test_with_individuals_and_ploidy_error (self ):
5729
+ tables = tskit .TableCollection (1.0 )
5730
+ tables .individuals .add_row ()
5731
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5732
+ ts = tables .tree_sequence ()
5733
+
5734
+ with pytest .raises (ValueError , match = "Cannot specify ploidy when individuals" ):
5735
+ ts .map_to_vcf_model (ploidy = 2 )
5736
+
5737
+ def test_specific_individuals (self ):
5738
+ tables = tskit .TableCollection (1.0 )
5739
+ # Create 5 individuals with varying ploidy
5740
+ for i in range (5 ):
5741
+ tables .individuals .add_row ()
5742
+ # Individuals have ploidy i+1
5743
+ for _ in range (i + 1 ):
5744
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = i )
5745
+ ts = tables .tree_sequence ()
5746
+
5747
+ result = ts .map_to_vcf_model (individuals = [1 , 3 ])
5748
+ assert isinstance (result , tskit .VcfModelMapping )
5749
+ # Individual 1 has ploidy 2, individual 3 has ploidy 4
5750
+ assert result .individuals_nodes .shape == (2 , 4 )
5751
+
5752
+ assert result .individuals_nodes [0 , 0 ] == 1
5753
+ assert result .individuals_nodes [0 , 1 ] == 2
5754
+ assert result .individuals_nodes [0 , 2 ] == - 1
5755
+ assert result .individuals_nodes [0 , 3 ] == - 1
5756
+
5757
+ assert result .individuals_nodes [1 , 0 ] == 6
5758
+ assert result .individuals_nodes [1 , 1 ] == 7
5759
+ assert result .individuals_nodes [1 , 2 ] == 8
5760
+ assert result .individuals_nodes [1 , 3 ] == 9
5761
+
5762
+ assert result .individuals_name .shape == (2 ,)
5763
+ assert result .individuals_name [0 ] == "tsk_1"
5764
+ assert result .individuals_name [1 ] == "tsk_3"
5765
+
5766
+ def test_individual_with_no_nodes_warning (self ):
5767
+ tables = tskit .TableCollection (1.0 )
5768
+ # Individual with no nodes
5769
+ tables .individuals .add_row ()
5770
+ # Individual with nodes
5771
+ tables .individuals .add_row ()
5772
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5773
+ ts = tables .tree_sequence ()
5774
+
5775
+ with warnings .catch_warnings (record = True ) as w :
5776
+ result = ts .map_to_vcf_model ()
5777
+ assert len (w ) == 1
5778
+ assert "Individual 0 has no nodes" in str (w [0 ].message )
5779
+
5780
+ # Should only include individual 1
5781
+ assert result .individuals_nodes .shape == (1 , 1 )
5782
+ assert result .individuals_nodes [0 , 0 ] == 0
5783
+ assert result .individuals_name .shape == (1 ,)
5784
+ assert result .individuals_name [0 ] == "tsk_1"
5785
+
5786
+ def test_individual_with_no_nodes_error (self ):
5787
+ tables = tskit .TableCollection (1.0 )
5788
+ # Individual with no nodes
5789
+ tables .individuals .add_row ()
5790
+ # Individual with nodes
5791
+ tables .individuals .add_row ()
5792
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5793
+ ts = tables .tree_sequence ()
5794
+
5795
+ with pytest .raises (ValueError , match = "Individual 0 has no nodes" ):
5796
+ ts .map_to_vcf_model (individuals = [0 ])
5797
+
5798
+ def test_invalid_individual_id (self ):
5799
+ tables = tskit .TableCollection (1.0 )
5800
+ tables .individuals .add_row ()
5801
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5802
+ ts = tables .tree_sequence ()
5803
+
5804
+ with pytest .raises (ValueError , match = "Invalid individual ID" ):
5805
+ ts .map_to_vcf_model (individuals = [- 1 ])
5806
+
5807
+ with pytest .raises (ValueError , match = "Invalid individual ID" ):
5808
+ ts .map_to_vcf_model (individuals = [1 ])
5809
+
5810
+ def test_mixed_sample_non_sample_warning (self ):
5811
+ tables = tskit .TableCollection (1.0 )
5812
+ tables .individuals .add_row ()
5813
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5814
+ tables .nodes .add_row (flags = 0 , time = 0 , individual = 0 ) # Non-sample node
5815
+ ts = tables .tree_sequence ()
5816
+
5817
+ with warnings .catch_warnings (record = True ) as w :
5818
+ ts .map_to_vcf_model ()
5819
+ assert len (w ) == 1
5820
+ assert "Individual 0 has both sample and non-sample nodes" in str (
5821
+ w [0 ].message
5822
+ )
5823
+
5824
+ def test_samples_without_individuals_warning (self ):
5825
+ tables = tskit .TableCollection (1.0 )
5826
+ tables .individuals .add_row ()
5827
+ # Node with individual
5828
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5829
+ # Node without individual
5830
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = tskit .NULL )
5831
+ ts = tables .tree_sequence ()
5832
+
5833
+ with warnings .catch_warnings (record = True ) as w :
5834
+ ts .map_to_vcf_model ()
5835
+ assert len (w ) == 1
5836
+ assert "At least one sample node does not have an individual ID" in str (
5837
+ w [0 ].message
5838
+ )
5839
+
5840
+ def test_metadata_key_for_names (self ):
5841
+ tables = tskit .TableCollection (1.0 )
5842
+
5843
+ # Add individuals with metadata
5844
+ tables .individuals .metadata_schema = tskit .MetadataSchema (
5845
+ {
5846
+ "codec" : "json" ,
5847
+ "type" : "object" ,
5848
+ "properties" : {"name" : {"type" : "string" }},
5849
+ }
5850
+ )
5851
+ tables .individuals .add_row (metadata = {"name" : "ind1" })
5852
+ tables .individuals .add_row (metadata = {"name" : "ind2" })
5853
+
5854
+ # Add nodes
5855
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5856
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5857
+ ts = tables .tree_sequence ()
5858
+
5859
+ result = ts .map_to_vcf_model (name_metadata_key = "name" )
5860
+ assert result .individuals_name .shape == (2 ,)
5861
+ assert result .individuals_name [0 ] == "ind1"
5862
+ assert result .individuals_name [1 ] == "ind2"
5863
+
5864
+ def test_custom_individual_names (self ):
5865
+ tables = tskit .TableCollection (1.0 )
5866
+ tables .individuals .add_row ()
5867
+ tables .individuals .add_row ()
5868
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5869
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5870
+ ts = tables .tree_sequence ()
5871
+
5872
+ custom_names = ["individual_A" , "individual_B" ]
5873
+ result = ts .map_to_vcf_model (individual_names = custom_names )
5874
+ assert result .individuals_name .shape == (2 ,)
5875
+ assert result .individuals_name [0 ] == "individual_A"
5876
+ assert result .individuals_name [1 ] == "individual_B"
5877
+
5878
+ def test_name_conflict_error (self ):
5879
+ tables = tskit .TableCollection (1.0 )
5880
+ ts = tables .tree_sequence ()
5881
+ with pytest .raises (
5882
+ ValueError ,
5883
+ match = "Cannot specify both name_metadata_key and individual_names" ,
5884
+ ):
5885
+ ts .map_to_vcf_model (
5886
+ name_metadata_key = "name" , individual_names = ["custom_name" ]
5887
+ )
5888
+
5889
+ def test_name_count_mismatch_error (self ):
5890
+ tables = tskit .TableCollection (1.0 )
5891
+ tables .individuals .add_row ()
5892
+ tables .individuals .add_row ()
5893
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5894
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5895
+ ts = tables .tree_sequence ()
5896
+
5897
+ with pytest .raises (
5898
+ ValueError , match = "number of individuals does not match the number of names"
5899
+ ):
5900
+ ts .map_to_vcf_model (individual_names = ["only_one_name" ])
0 commit comments