1
+ import numpy as np
1
2
import numpy .testing as npt
2
3
import pytest
3
4
from sgkit_bgen import read_bgen
5
+ from sgkit_bgen .bgen_reader import BgenReader
4
6
7
+ CHUNKS = [
8
+ (100 , 200 , 3 ),
9
+ (100 , 200 , 1 ),
10
+ (100 , 500 , 3 ),
11
+ (199 , 500 , 3 ),
12
+ ((100 , 99 ), 500 , 2 ),
13
+ "auto" ,
14
+ ]
15
+ INDEXES = [0 , 10 , 20 , 100 , - 1 ]
5
16
6
- @pytest .mark .parametrize ("chunks" , [(100 , 200 ), (100 , 500 ), (199 , 500 ), "auto" ])
17
+ # Expectations below generated using bgen-reader directly, ex:
18
+ # > from bgen_reader import open_bgen
19
+ # > bgen = open_bgen('sgkit_bgen/tests/data/example.bgen', verbose=False)
20
+ # > bgen.read(-1)[0] # Probabilities for last variant, first sample
21
+ # array([[0.0133972 , 0.98135378, 0.00524902]]
22
+ # > bgen.allele_expectation(-1)[0, 0, -1] # Dosage for last variant, first sample
23
+ # 0.9918518217727197
24
+ EXPECTED_PROBABILITIES = np .array (
25
+ [ # Generated using bgen-reader directly
26
+ [np .nan , np .nan , np .nan ],
27
+ [0.007 , 0.966 , 0.0259 ],
28
+ [0.993 , 0.002 , 0.003 ],
29
+ [0.916 , 0.007 , 0.0765 ],
30
+ [0.013 , 0.981 , 0.0052 ],
31
+ ]
32
+ )
33
+ EXPECTED_DOSAGES = np .array (
34
+ [np .nan , 1.018 , 0.010 , 0.160 , 0.991 ] # Generated using bgen-reader directly
35
+ )
36
+
37
+
38
+ @pytest .mark .parametrize ("chunks" , CHUNKS )
7
39
def test_read_bgen (shared_datadir , chunks ):
8
40
path = shared_datadir / "example.bgen"
9
41
ds = read_bgen (path , chunks = chunks )
@@ -12,6 +44,21 @@ def test_read_bgen(shared_datadir, chunks):
12
44
assert ds ["call_dosage" ].shape == (199 , 500 )
13
45
npt .assert_almost_equal (ds ["call_dosage" ].values [1 ][0 ], 1.987 , decimal = 3 )
14
46
npt .assert_almost_equal (ds ["call_dosage" ].values [100 ][0 ], 0.160 , decimal = 3 )
47
+ npt .assert_array_equal (ds ["call_dosage_mask" ].values [0 , 0 ], [True ])
48
+ npt .assert_array_equal (ds ["call_dosage_mask" ].values [0 , 1 ], [False ])
49
+ assert ds ["call_genotype_probability" ].shape == (199 , 500 , 3 )
50
+ npt .assert_almost_equal (
51
+ ds ["call_genotype_probability" ].values [1 ][0 ], [0.005 , 0.002 , 0.992 ], decimal = 3
52
+ )
53
+ npt .assert_almost_equal (
54
+ ds ["call_genotype_probability" ].values [100 ][0 ], [0.916 , 0.007 , 0.076 ], decimal = 3
55
+ )
56
+ npt .assert_array_equal (
57
+ ds ["call_genotype_probability_mask" ].values [0 , 0 ], [True ] * 3
58
+ )
59
+ npt .assert_array_equal (
60
+ ds ["call_genotype_probability_mask" ].values [0 , 1 ], [False ] * 3
61
+ )
15
62
16
63
17
64
def test_read_bgen_with_sample_file (shared_datadir ):
@@ -38,3 +85,43 @@ def test_read_bgen_with_no_samples(shared_datadir):
38
85
"sample_3" ,
39
86
"sample_4" ,
40
87
]
88
+
89
+
90
+ @pytest .mark .parametrize ("chunks" , CHUNKS )
91
+ def test_read_bgen_fancy_index (shared_datadir , chunks ):
92
+ path = shared_datadir / "example.bgen"
93
+ ds = read_bgen (path , chunks = chunks )
94
+ npt .assert_almost_equal (
95
+ ds ["call_genotype_probability" ][INDEXES , 0 ], EXPECTED_PROBABILITIES , decimal = 3
96
+ )
97
+ npt .assert_almost_equal (ds ["call_dosage" ][INDEXES , 0 ], EXPECTED_DOSAGES , decimal = 3 )
98
+
99
+
100
+ @pytest .mark .parametrize ("chunks" , CHUNKS )
101
+ def test_read_bgen_scalar_index (shared_datadir , chunks ):
102
+ path = shared_datadir / "example.bgen"
103
+ ds = read_bgen (path , chunks = chunks )
104
+ for i , ix in enumerate (INDEXES ):
105
+ npt .assert_almost_equal (
106
+ ds ["call_genotype_probability" ][ix , 0 ], EXPECTED_PROBABILITIES [i ], decimal = 3
107
+ )
108
+ npt .assert_almost_equal (
109
+ ds ["call_dosage" ][ix , 0 ], EXPECTED_DOSAGES [i ], decimal = 3
110
+ )
111
+ for j in range (3 ):
112
+ npt .assert_almost_equal (
113
+ ds ["call_genotype_probability" ][ix , 0 , j ],
114
+ EXPECTED_PROBABILITIES [i , j ],
115
+ decimal = 3 ,
116
+ )
117
+
118
+
119
+ def test_read_bgen_raise_on_invalid_indexers (shared_datadir ):
120
+ path = shared_datadir / "example.bgen"
121
+ reader = BgenReader (path )
122
+ with pytest .raises (IndexError , match = "Indexer must be tuple" ):
123
+ reader [[0 ]]
124
+ with pytest .raises (IndexError , match = "Indexer must have 3 items" ):
125
+ reader [(slice (None ),)]
126
+ with pytest .raises (IndexError , match = "Indexer must contain only slices or ints" ):
127
+ reader [([0 ], [0 ], [0 ])]
0 commit comments