Skip to content

Commit 820f4e6

Browse files
Merge pull request #71 from MatthewSZhang/narx-cython
ENH speed up grad computation via Cython
2 parents 993a9ed + b96534e commit 820f4e6

File tree

8 files changed

+426
-268
lines changed

8 files changed

+426
-268
lines changed

fastcan/_cancorr_fast.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ from cython.parallel import prange
1111
from scipy.linalg.cython_blas cimport isamax, idamax
1212
from sklearn.utils._cython_blas cimport ColMajor, NoTrans
1313
from sklearn.utils._cython_blas cimport _dot, _scal, _nrm2, _gemm, _axpy
14-
from sklearn.utils._typedefs cimport int32_t, uint8_t
14+
from sklearn.utils._typedefs cimport uint8_t
1515

1616

1717
@final
@@ -106,7 +106,7 @@ cdef floating _sscvm(
106106
cdef void _mgsvv(
107107
const floating* w, # IN
108108
const floating* x, # IN/OUT
109-
int n_samples, # IN
109+
int n_samples, # IN
110110
) noexcept nogil:
111111
"""
112112
Modified Gram-Schmidt process. x = x - w*w.T*x
@@ -133,7 +133,7 @@ cpdef int _forward_search(
133133
int num_threads, # IN
134134
int verbose, # IN
135135
uint8_t[::1] mask, # IN/TEMP
136-
int32_t[::1] indices, # OUT
136+
int[::1] indices, # OUT
137137
floating[::1] scores, # OUT
138138
) except -1 nogil:
139139
"""

fastcan/_fastcan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def _get_support_mask(self):
293293

294294
def _prepare_search(n_features, n_features_to_select, indices_include, indices_exclude):
295295
# initiated with -1
296-
indices = np.full(n_features_to_select, -1, dtype=np.intc, order="F")
296+
indices = np.full(n_features_to_select, -1, dtype=np.int32, order="F")
297297
indices[: indices_include.size] = indices_include
298298
scores = np.zeros(n_features_to_select, dtype=float, order="F")
299299
mask = np.zeros(n_features, dtype=np.ubyte, order="F")

fastcan/_narx_fast.pyx

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""
2+
Fast gradient computation for narx
3+
"""
4+
# Authors: The fastcan developers
5+
# SPDX-License-Identifier: MIT
6+
7+
from cython cimport floating, final
8+
9+
10+
@final
11+
cpdef void _update_terms(
12+
const floating[:, ::1] X, # IN
13+
const floating[:, ::1] y_hat, # IN
14+
floating[::1] terms, # OUT
15+
const int[:, ::1] feat_ids, # IN
16+
const int[:, ::1] delay_ids, # IN
17+
const int k, # IN
18+
) noexcept nogil:
19+
"""
20+
Evaluate all terms for the given features and delays at timestep k.
21+
"""
22+
cdef:
23+
int i
24+
int n_coefs = feat_ids.shape[0]
25+
26+
for i in range(n_coefs):
27+
terms[i] = _evaluate_term(
28+
X, y_hat, feat_ids[i], delay_ids[i], k
29+
)
30+
31+
32+
@final
33+
cpdef void _predict_step(
34+
const floating[:, ::1] X, # IN
35+
const floating[:, ::1] y_hat, # IN
36+
floating[::1] y_pred, # OUT
37+
const floating[::1] coef, # IN
38+
const int[:, ::1] feat_ids, # IN
39+
const int[:, ::1] delay_ids, # IN
40+
const int[::1] output_ids, # IN
41+
const int k, # IN
42+
) noexcept nogil:
43+
"""
44+
Evaluate the expression for all outputs at timestep k.
45+
"""
46+
cdef:
47+
int n_terms = feat_ids.shape[0]
48+
int i, output_i
49+
50+
# Add all terms
51+
for i in range(n_terms):
52+
output_i = output_ids[i]
53+
y_pred[output_i] += coef[i] * _evaluate_term(
54+
X, y_hat, feat_ids[i], delay_ids[i], k
55+
)
56+
57+
58+
@final
59+
cdef floating _evaluate_term(
60+
const floating[:, ::1] X, # IN
61+
const floating[:, ::1] y_hat, # IN
62+
const int[::1] feat_ids, # IN
63+
const int[::1] delay_ids, # IN
64+
const int k, # IN
65+
) noexcept nogil:
66+
"""
67+
Evaluate a term based on feature and delay IDs.
68+
"""
69+
cdef:
70+
int n_feats = X.shape[1]
71+
int n_vars = feat_ids.shape[0]
72+
floating term = 1.0
73+
int i, feat_id
74+
75+
for i in range(n_vars):
76+
feat_id = feat_ids[i]
77+
if feat_id != -1:
78+
if feat_id < n_feats:
79+
term *= X[k - delay_ids[i], feat_id]
80+
else:
81+
term *= y_hat[k - delay_ids[i], feat_id - n_feats]
82+
83+
return term
84+
85+
86+
@final
87+
cpdef void _update_cfd(
88+
const floating[:, ::1] X, # IN
89+
const floating[:, ::1] y_hat, # IN
90+
floating[:, :, ::1] cfd, # OUT
91+
const floating[::1] coef, # IN
92+
const int[:, ::1] grad_yyd_ids, # IN
93+
const int[::1] grad_coef_ids, # IN
94+
const int[:, ::1] grad_feat_ids, # IN
95+
const int[:, ::1] grad_delay_ids, # IN
96+
const int k, # IN
97+
) noexcept nogil:
98+
"""
99+
Updates CFD matrix based on the current state.
100+
"""
101+
cdef:
102+
int n_grad_terms = grad_yyd_ids.shape[0]
103+
int i, row_y_id, col_y_id, delay_id_1
104+
105+
for i in range(n_grad_terms):
106+
row_y_id = grad_yyd_ids[i, 0]
107+
col_y_id = grad_yyd_ids[i, 1]
108+
delay_id_1 = grad_yyd_ids[i, 2]
109+
110+
cfd[row_y_id, col_y_id, delay_id_1] += coef[grad_coef_ids[i]] * \
111+
_evaluate_term(
112+
X, y_hat, grad_feat_ids[i], grad_delay_ids[i], k
113+
)

fastcan/meson.build

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,12 @@ py.extension_module(
1313
subdir: 'fastcan',
1414
cython_args: cython_args,
1515
install: true
16+
)
17+
18+
py.extension_module(
19+
'_narx_fast',
20+
'_narx_fast.pyx',
21+
subdir: 'fastcan',
22+
cython_args: cython_args,
23+
install: true
1624
)

0 commit comments

Comments
 (0)