Skip to content

FEAT add param fit_intercept for NARX #78

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:

steps:
- uses: actions/checkout@v4
- uses: prefix-dev/[email protected].4
- uses: prefix-dev/[email protected].8
with:
environments: default
cache: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

steps:
- uses: actions/checkout@v4
- uses: prefix-dev/[email protected].3
- uses: prefix-dev/[email protected].8
with:
environments: default
cache: true
Expand Down
66 changes: 54 additions & 12 deletions fastcan/narx.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,8 @@ class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator):
The id numbers indicate which output the polynomial term belongs to.
It is useful in multi-output case.

fit_intercept : bool, default=True
Whether to fit the intercept. If set to False, intercept will be zeros.

Attributes
----------
Expand Down Expand Up @@ -641,6 +643,7 @@ class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator):
"feat_ids": [None, "array-like"],
"delay_ids": [None, "array-like"],
"output_ids": [None, "array-like"],
"fit_intercept": ["boolean"],
}

def __init__(
Expand All @@ -649,10 +652,12 @@ def __init__(
feat_ids=None,
delay_ids=None,
output_ids=None,
fit_intercept=True,
):
self.feat_ids = feat_ids
self.delay_ids = delay_ids
self.output_ids = output_ids
self.fit_intercept = fit_intercept

@validate_params(
{
Expand Down Expand Up @@ -769,13 +774,12 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
)

self.max_delay_ = self.delay_ids_.max()
n_coef_intercept = n_terms + self.n_outputs_

if isinstance(coef_init, (type(None), str)):
# fit a one-step-ahead NARX model
time_shift_ids, poly_ids = fd2tp(self.feat_ids_, self.delay_ids_)
xy_hstack = np.c_[X, y]
osa_narx = LinearRegression()
osa_narx = LinearRegression(fit_intercept=self.fit_intercept)
time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids)
poly_terms = make_poly_features(time_shift_vars, poly_ids)
# Remove missing values
Expand All @@ -787,7 +791,10 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
for i in range(self.n_outputs_):
output_i_mask = self.output_ids_ == i
if np.sum(output_i_mask) == 0:
intercept[i] = np.mean(y_masked[:, i])
if self.fit_intercept:
intercept[i] = np.mean(y_masked[:, i])
else:
intercept[i] = 0.0
continue
osa_narx.fit(
poly_terms_masked[:, output_i_mask],
Expand All @@ -801,13 +808,20 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
self.intercept_ = intercept
return self

coef_init = np.r_[coef, intercept]
if self.fit_intercept:
coef_init = np.r_[coef, intercept]
else:
coef_init = coef
else:
coef_init = check_array(
coef_init,
ensure_2d=False,
dtype=float,
)
if self.fit_intercept:
n_coef_intercept = n_terms + self.n_outputs_
else:
n_coef_intercept = n_terms
if coef_init.shape[0] != n_coef_intercept:
raise ValueError(
"`coef_init` should have the shape of "
Expand All @@ -829,6 +843,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
self.feat_ids_,
self.delay_ids_,
self.output_ids_,
self.fit_intercept,
sample_weight_sqrt,
grad_yyd_ids,
grad_coef_ids,
Expand All @@ -837,8 +852,12 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
),
**params,
)
self.coef_ = res.x[: -self.n_outputs_]
self.intercept_ = res.x[-self.n_outputs_ :]
if self.fit_intercept:
self.coef_ = res.x[: -self.n_outputs_]
self.intercept_ = res.x[-self.n_outputs_ :]
else:
self.coef_ = res.x
self.intercept_ = np.zeros(self.n_outputs_, dtype=float)
return self

@staticmethod
Expand Down Expand Up @@ -931,6 +950,7 @@ def _update_dydx(
feat_ids,
delay_ids,
output_ids,
fit_intercept,
grad_yyd_ids,
grad_coef_ids,
grad_feat_ids,
Expand All @@ -947,8 +967,13 @@ def _update_dydx(
n_samples, n_y = y_hat.shape
max_delay = np.max(delay_ids)
n_coefs = feat_ids.shape[0]
n_x = n_coefs + n_y # Total number of coefficients and intercepts
y_ids = np.r_[output_ids, np.arange(n_y)]
if fit_intercept:
n_x = n_coefs + n_y # Total number of coefficients and intercepts
y_ids = np.r_[output_ids, np.arange(n_y)]
else:
n_x = n_coefs
y_ids = output_ids

x_ids = np.arange(n_x)

dydx = np.zeros((n_samples, n_y, n_x), dtype=float)
Expand Down Expand Up @@ -1011,13 +1036,18 @@ def _loss(
feat_ids,
delay_ids,
output_ids,
fit_intercept,
sample_weight_sqrt,
*args,
):
# Sum of squared errors
n_outputs = y.shape[1]
coef = coef_intercept[:-n_outputs]
intercept = coef_intercept[-n_outputs:]
if fit_intercept:
coef = coef_intercept[:-n_outputs]
intercept = coef_intercept[-n_outputs:]
else:
coef = coef_intercept
intercept = np.zeros(n_outputs, dtype=float)

y_hat = NARX._predict(
expression,
Expand Down Expand Up @@ -1045,6 +1075,7 @@ def _grad(
feat_ids,
delay_ids,
output_ids,
fit_intercept,
sample_weight_sqrt,
grad_yyd_ids,
grad_coef_ids,
Expand All @@ -1053,8 +1084,12 @@ def _grad(
):
# Sum of squared errors
n_outputs = y.shape[1]
coef = coef_intercept[:-n_outputs]
intercept = coef_intercept[-n_outputs:]
if fit_intercept:
coef = coef_intercept[:-n_outputs]
intercept = coef_intercept[-n_outputs:]
else:
coef = coef_intercept
intercept = np.zeros(n_outputs, dtype=float)

y_hat = NARX._predict(
expression,
Expand All @@ -1073,6 +1108,7 @@ def _grad(
feat_ids,
delay_ids,
output_ids,
fit_intercept,
grad_yyd_ids,
grad_coef_ids,
grad_feat_ids,
Expand Down Expand Up @@ -1266,6 +1302,7 @@ def _get_term_str(term_feat_ids, term_delay_ids):
"poly_degree": [
Interval(Integral, 1, None, closed="left"),
],
"fit_intercept": ["boolean"],
"include_zero_delay": [None, "array-like"],
"static_indices": [None, "array-like"],
"refine_verbose": ["verbose"],
Expand All @@ -1288,6 +1325,7 @@ def make_narx(
max_delay=1,
poly_degree=1,
*,
fit_intercept=True,
include_zero_delay=None,
static_indices=None,
refine_verbose=1,
Expand Down Expand Up @@ -1316,6 +1354,9 @@ def make_narx(
poly_degree : int, default=1
The maximum degree of polynomial features.

fit_intercept : bool, default=True
Whether to fit the intercept. If set to False, intercept will be zeros.

include_zero_delay : {None, array-like} of shape (n_features,) default=None
Whether to include the original (zero-delay) features.

Expand Down Expand Up @@ -1469,4 +1510,5 @@ def make_narx(
feat_ids=feat_ids,
delay_ids=delay_ids,
output_ids=output_ids,
fit_intercept=fit_intercept,
)
40 changes: 40 additions & 0 deletions tests/test_narx.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,46 @@ def test_mulit_output_warn():
0.0,
)

def test_fit_intercept():
X = np.random.rand(10, 2)
y = np.random.rand(10, 1)
time_shift_ids = np.array([[0, 1], [1, 1]])
poly_ids = np.array([[1, 1], [2, 2]])
feat_ids, delay_ids = tp2fd(time_shift_ids, poly_ids)

narx = NARX(
feat_ids=feat_ids,
delay_ids=delay_ids,
fit_intercept=False,
)
narx.fit(X, y)
assert_almost_equal(narx.intercept_, 0.0)
narx.fit(X, y, coef_init="one_step_ahead")
assert_almost_equal(narx.intercept_, 0.0)

X = np.random.rand(10, 2)
y = np.random.rand(10, 2)
time_shift_ids = np.array([[0, 1], [1, 1]])
poly_ids = np.array([[1, 1], [2, 2]])
feat_ids, delay_ids = tp2fd(time_shift_ids, poly_ids)

narx = make_narx(X, y, 1, 2, 2, fit_intercept=False)
narx.fit(X, y)
assert_array_equal(narx.intercept_, [0.0, 0.0])
narx.fit(X, y, coef_init="one_step_ahead")
assert_array_equal(narx.intercept_, [0.0, 0.0])

with pytest.warns(UserWarning, match="output_ids got"):
narx = NARX(
feat_ids=feat_ids,
delay_ids=delay_ids,
fit_intercept=False,
)
narx.fit(X, y)
assert_array_equal(narx.intercept_, [0.0, 0.0])
narx.fit(X, y, coef_init=[0, 0])
assert_array_equal(narx.intercept_, [0.0, 0.0])

def test_mulit_output_error():
X = np.random.rand(10, 2)
y = np.random.rand(10, 2)
Expand Down
81 changes: 80 additions & 1 deletion tests/test_narx_jac.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_simple():
output_ids = np.array([0, 0], dtype=np.int32)
coef = np.array([0.4, 1])
intercept = np.array([1], dtype=float)
sample_weight = np.array([1, 1, 1], dtype=float)
sample_weight = np.array([1, 1, 1], dtype=float).reshape(-1, 1)


y_hat = NARX._predict(
Expand Down Expand Up @@ -72,6 +72,7 @@ def test_simple():
feat_ids,
delay_ids,
output_ids,
True,
np.sqrt(sample_weight),
grad_yyd_ids,
grad_coef_ids,
Expand All @@ -80,6 +81,37 @@ def test_simple():
)

assert_almost_equal(grad.sum(axis=0), grad_truth, decimal=4)
grad_0 = NARX._grad(
np.r_[coef_1, 0],
_predict_step,
X,
y,
feat_ids,
delay_ids,
output_ids,
True,
np.sqrt(sample_weight),
grad_yyd_ids,
grad_coef_ids,
grad_feat_ids,
grad_delay_ids,
)
grad = NARX._grad(
coef_1,
_predict_step,
X,
y,
feat_ids,
delay_ids,
output_ids,
False,
np.sqrt(sample_weight),
grad_yyd_ids,
grad_coef_ids,
grad_feat_ids,
grad_delay_ids,
)
assert_almost_equal(grad.sum(axis=0), grad_0.sum(axis=0)[:-1])

def test_complex():
"""Complex model"""
Expand Down Expand Up @@ -172,6 +204,7 @@ def test_complex():
feat_ids,
delay_ids,
output_ids,
True,
np.sqrt(np.ones((y.shape[0], 1))),
grad_yyd_ids,
grad_coef_ids,
Expand Down Expand Up @@ -219,6 +252,52 @@ def test_complex():

assert_allclose(grad.sum(axis=0)[i], grad_num.sum(), rtol=1e-1)

grad = NARX._grad(
coef,
_predict_step,
X,
y,
feat_ids,
delay_ids,
output_ids,
False,
np.sqrt(np.ones((y.shape[0], 1))),
grad_yyd_ids,
grad_coef_ids,
grad_feat_ids,
grad_delay_ids,
)
y_hat_0 = NARX._predict(
_predict_step,
X=X,
y_ref=y,
coef=coef,
intercept=[0, 0],
feat_ids=feat_ids,
delay_ids=delay_ids,
output_ids=output_ids,
)
e_0 = y_hat_0 - y

for i in range(len(coef)):
coef_1 = np.copy(coef)
coef_1[i] += delta_w

y_hat_1 = NARX._predict(
_predict_step,
X=X,
y_ref=y,
coef=coef_1,
intercept=[0, 0],
feat_ids=feat_ids,
delay_ids=delay_ids,
output_ids=output_ids,
)

e_1 = y_hat_1 - y
grad_num = (e_1 - e_0).sum(axis=1) / delta_w

assert_allclose(grad.sum(axis=0)[i], grad_num.sum(), rtol=1e-1)

def test_score_nan():
"""Test fitting scores when data contain nan."""
Expand Down
Loading