Skip to content
This repository was archived by the owner on Oct 15, 2020. It is now read-only.

Commit 0153804

Browse files
authored
Merge pull request #19 from eric-czech/mypy
Add Mypy
2 parents 3b7c30b + 4d2d4d2 commit 0153804

File tree

3 files changed

+38
-12
lines changed

3 files changed

+38
-12
lines changed

.pre-commit-config.yaml

+15-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,21 @@ repos:
2121
- id: black
2222
language_version: python3
2323
- repo: https://gitlab.com/pycqa/flake8
24-
rev: 3.7.9
24+
rev: 3.8.3
2525
hooks:
2626
- id: flake8
2727
language_version: python3
28+
- repo: https://github.com/pre-commit/mirrors-mypy
29+
rev: v0.782
30+
hooks:
31+
- id: mypy
32+
args: ["--strict", "--show-error-codes"]
33+
additional_dependencies:
34+
- dask[dataframe,array]
35+
- fsspec
36+
- numpy
37+
- scipy
38+
- xarray
39+
- zarr
40+
- bgen_reader>=4.0.5
41+
- git+https://github.com/pystatgen/sgkit

setup.cfg

+8-1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ line_length = 88
6363

6464
[mypy-numpy.*]
6565
ignore_missing_imports = True
66-
66+
[mypy-dask.*]
67+
ignore_missing_imports = True
68+
[mypy-setuptools.*]
69+
ignore_missing_imports = True
70+
[mypy-bgen_reader.*]
71+
ignore_missing_imports = True
72+
[mypy-sgkit.*]
73+
ignore_missing_imports = True
6774
[mypy-sgkit_bgen.tests.*]
6875
disallow_untyped_defs = False

sgkit_bgen/bgen_reader.py

+15-10
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
"""BGEN reader implementation (using bgen_reader)"""
22
from pathlib import Path
3-
from typing import Any, Union
3+
from typing import Any, Dict, Tuple, Union
44

55
import dask.array as da
6+
import dask.dataframe as dd
67
import numpy as np
78
from bgen_reader._bgen_file import bgen_file
89
from bgen_reader._bgen_metafile import bgen_metafile
@@ -18,7 +19,7 @@
1819
PathType = Union[str, Path]
1920

2021

21-
def _to_dict(df, dtype=None):
22+
def _to_dict(df: dd.DataFrame, dtype: Any = None) -> Dict[str, da.Array]:
2223
return {
2324
c: df[c].to_dask_array(lengths=True).astype(dtype[c] if dtype else df[c].dtype)
2425
for c in df
@@ -42,7 +43,9 @@ class BgenReader:
4243

4344
name = "bgen_reader"
4445

45-
def __init__(self, path, persist=True, dtype=np.float32):
46+
def __init__(
47+
self, path: PathType, persist: bool = True, dtype: Any = np.float32
48+
) -> None:
4649
self.path = Path(path)
4750

4851
self.metafile_filepath = infer_metafile_filepath(Path(self.path))
@@ -63,11 +66,13 @@ def __init__(self, path, persist=True, dtype=np.float32):
6366
self.contig = variant_arrs["chrom"]
6467
self.pos = variant_arrs["pos"]
6568

66-
def split_alleles(alleles, block_info=None):
69+
def split_alleles(
70+
alleles: np.ndarray, block_info: Any = None
71+
) -> np.ndarray:
6772
if block_info is None or len(block_info) == 0:
6873
return alleles
6974

70-
def split(allele_row):
75+
def split(allele_row: np.ndarray) -> np.ndarray:
7176
alleles_list = allele_row[0].split(",")
7277
assert len(alleles_list) == 2 # bi-allelic
7378
return np.array(alleles_list)
@@ -98,7 +103,7 @@ def max_str_len(arr: ArrayLike) -> Any:
98103
self.dtype = dtype
99104
self.ndim = 3
100105

101-
def __getitem__(self, idx):
106+
def __getitem__(self, idx: Any) -> np.ndarray:
102107
if not isinstance(idx, tuple):
103108
raise IndexError(f"Indexer must be tuple (received {type(idx)})")
104109
if len(idx) != self.ndim:
@@ -150,11 +155,11 @@ def __getitem__(self, idx):
150155
if res is None:
151156
res = np.zeros((len(all_vaddr), len(probs), 3), dtype=self.dtype)
152157
res[i] = probs
153-
res = res[..., idx[2]]
158+
res = res[..., idx[2]] # type: ignore[index]
154159
return np.squeeze(res, axis=squeeze_dims)
155160

156161

157-
def _to_dosage(probs: ArrayLike):
162+
def _to_dosage(probs: ArrayLike) -> ArrayLike:
158163
"""Calculate the dosage from genotype likelihoods (probabilities)"""
159164
assert (
160165
probs.shape[-1] == 3
@@ -164,7 +169,7 @@ def _to_dosage(probs: ArrayLike):
164169

165170
def read_bgen(
166171
path: PathType,
167-
chunks: Union[str, int, tuple] = "auto",
172+
chunks: Union[str, int, Tuple[int, ...]] = "auto",
168173
lock: bool = False,
169174
persist: bool = True,
170175
) -> Dataset:
@@ -217,7 +222,7 @@ def read_bgen(
217222
)
218223
call_dosage = _to_dosage(call_genotype_probability)
219224

220-
ds = create_genotype_dosage_dataset(
225+
ds: Dataset = create_genotype_dosage_dataset(
221226
variant_contig_names=variant_contig_names,
222227
variant_contig=variant_contig,
223228
variant_position=variant_position,

0 commit comments

Comments
 (0)