Skip to content

Commit 5c92e01

Browse files
committed
Add relative error check in adaptive time stepping algorithm for rk23, rk45dp and rk56 time steppers
1 parent 20ac12e commit 5c92e01

File tree

6 files changed

+174
-32
lines changed

6 files changed

+174
-32
lines changed

cuda/vecnorm.cu

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#include "float3.h"
2+
3+
// dst = sqrt(ax*ax + ay*ay + az*az)
4+
extern "C" __global__ void
5+
vecnorm(float* __restrict__ dst,
6+
float* __restrict__ ax, float* __restrict__ ay, float* __restrict__ az,
7+
int N) {
8+
9+
int i = ( blockIdx.y*gridDim.x + blockIdx.x ) * blockDim.x + threadIdx.x;
10+
if (i < N) {
11+
float3 A = {ax[i], ay[i], az[i]};
12+
dst[i] = sqrtf(dot(A, A));
13+
}
14+
}

cuda/vecnorm.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package cuda
2+
3+
import (
4+
"github.com/mumax/3/data"
5+
"github.com/mumax/3/util"
6+
)
7+
8+
// dst = sqrt(dot(a, a)),
9+
func VecNorm(dst *data.Slice, a *data.Slice) {
10+
util.Argument(dst.NComp() == 1 && a.NComp() == 3)
11+
util.Argument(dst.Len() == a.Len())
12+
13+
N := dst.Len()
14+
cfg := make1DConf(N)
15+
k_vecnorm_async(dst.DevPtr(0),
16+
a.DevPtr(X), a.DevPtr(Y), a.DevPtr(Z),
17+
N, cfg)
18+
}

engine/rk23.go

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,44 @@ func (rk *RK23) Step() {
8787

8888
// adjust next time step
8989
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
90-
// step OK
91-
setLastErr(err)
92-
setMaxTorque(k4)
93-
NSteps++
94-
Time = t0 + Dt_si
95-
adaptDt(math.Pow(MaxErr/err, 1./3.))
96-
data.Copy(rk.k1, k4) // FSAL
90+
// Passed absolute error. Check relative error...
91+
errnorm := cuda.Buffer(1, size)
92+
defer cuda.Recycle(errnorm)
93+
cuda.VecNorm(errnorm, Err)
94+
ddtnorm := cuda.Buffer(1, size)
95+
defer cuda.Recycle(ddtnorm)
96+
cuda.VecNorm(ddtnorm, k4)
97+
maxdm := cuda.MaxVecNorm(k4)
98+
fail := 0
99+
rlerr := float64(0.0)
100+
if maxdm < MinSlope { // Only step using relerr if dmdt is big enough. Overcomes equilibrium problem
101+
fail = 0
102+
} else {
103+
cuda.Div(errnorm, errnorm, ddtnorm) //re-use errnorm
104+
rlerr = float64(cuda.MaxAbs(errnorm))
105+
fail = 1
106+
}
107+
if fail == 0 || RelErr <= 0.0 || rlerr < RelErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
108+
// step OK
109+
setLastErr(err)
110+
setMaxTorque(k4)
111+
NSteps++
112+
Time = t0 + Dt_si
113+
if fail == 0 {
114+
adaptDt(math.Pow(MaxErr/err, 1./3.))
115+
} else {
116+
adaptDt(math.Pow(RelErr/rlerr, 1./3.))
117+
}
118+
data.Copy(rk.k1, k4) // FSAL
119+
} else {
120+
// undo bad step
121+
//util.Println("Bad step at t=", t0, ", err=", err)
122+
util.Assert(FixDt == 0)
123+
Time = t0
124+
data.Copy(m, m0)
125+
NUndone++
126+
adaptDt(math.Pow(RelErr/rlerr, 1./4.))
127+
}
97128
} else {
98129
// undo bad step
99130
//util.Println("Bad step at t=", t0, ", err=", err)

engine/rk45dp.go

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,44 @@ func (rk *RK45DP) Step() {
101101

102102
// adjust next time step
103103
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
104-
// step OK
105-
setLastErr(err)
106-
setMaxTorque(k7)
107-
NSteps++
108-
Time = t0 + Dt_si
109-
adaptDt(math.Pow(MaxErr/err, 1./5.))
110-
data.Copy(rk.k1, k7) // FSAL
104+
//Passed absolute error. Check relative error...
105+
errnorm := cuda.Buffer(1, size)
106+
defer cuda.Recycle(errnorm)
107+
cuda.VecNorm(errnorm, Err)
108+
ddtnorm := cuda.Buffer(1, size)
109+
defer cuda.Recycle(ddtnorm)
110+
cuda.VecNorm(ddtnorm, k7)
111+
maxdm := cuda.MaxVecNorm(k7)
112+
fail := 0
113+
rlerr := float64(0.0)
114+
if maxdm < MinSlope { // Only step using relerr if dmdt is big enough. Overcomes equilibrium problem
115+
fail = 0
116+
} else {
117+
cuda.Div(errnorm, errnorm, ddtnorm) //re-use errnorm
118+
rlerr = float64(cuda.MaxAbs(errnorm))
119+
fail = 1
120+
}
121+
if fail == 0 || RelErr <= 0.0 || rlerr < RelErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
122+
// step OK
123+
setLastErr(err)
124+
setMaxTorque(k7)
125+
NSteps++
126+
Time = t0 + Dt_si
127+
if fail == 0 {
128+
adaptDt(math.Pow(MaxErr/err, 1./5.))
129+
} else {
130+
adaptDt(math.Pow(RelErr/rlerr, 1./5.))
131+
}
132+
data.Copy(rk.k1, k7) // FSAL
133+
} else {
134+
// undo bad step
135+
//util.Println("Bad step at t=", t0, ", err=", err)
136+
util.Assert(FixDt == 0)
137+
Time = t0
138+
data.Copy(m, m0)
139+
NUndone++
140+
adaptDt(math.Pow(RelErr/rlerr, 1./6.))
141+
}
111142
} else {
112143
// undo bad step
113144
//util.Println("Bad step at t=", t0, ", err=", err)

engine/rk56.go

Lines changed: 63 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
)
99

1010
type RK56 struct {
11+
k1 *data.Slice // torque at end of step is kept for beginning of next step
1112
}
1213

1314
func (rk *RK56) Step() {
@@ -19,14 +20,24 @@ func (rk *RK56) Step() {
1920
Dt_si = FixDt
2021
}
2122

23+
// upon resize: remove wrongly sized k1
24+
if rk.k1.Size() != m.Size() {
25+
rk.Free()
26+
}
27+
28+
// first step ever: one-time k1 init and eval
29+
if rk.k1 == nil {
30+
rk.k1 = cuda.NewSlice(3, size)
31+
torqueFn(rk.k1)
32+
}
33+
2234
t0 := Time
2335
// backup magnetization
2436
m0 := cuda.Buffer(3, size)
2537
defer cuda.Recycle(m0)
2638
data.Copy(m0, m)
2739

28-
k1, k2, k3, k4, k5, k6, k7, k8 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size)
29-
defer cuda.Recycle(k1)
40+
k2, k3, k4, k5, k6, k7, k8 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size)
3041
defer cuda.Recycle(k2)
3142
defer cuda.Recycle(k3)
3243
defer cuda.Recycle(k4)
@@ -39,73 +50,105 @@ func (rk *RK56) Step() {
3950
h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL
4051

4152
// stage 1
42-
torqueFn(k1)
53+
torqueFn(rk.k1)
4354

4455
// stage 2
4556
Time = t0 + (1./6.)*Dt_si
46-
cuda.Madd2(m, m, k1, 1, (1./6.)*h) // m = m*1 + k1*h/6
57+
cuda.Madd2(m, m, rk.k1, 1, (1./6.)*h) // m = m*1 + k1*h/6
4758
M.normalize()
4859
torqueFn(k2)
4960

5061
// stage 3
5162
Time = t0 + (4./15.)*Dt_si
52-
cuda.Madd3(m, m0, k1, k2, 1, (4./75.)*h, (16./75.)*h)
63+
cuda.Madd3(m, m0, rk.k1, k2, 1, (4./75.)*h, (16./75.)*h)
5364
M.normalize()
5465
torqueFn(k3)
5566

5667
// stage 4
5768
Time = t0 + (2./3.)*Dt_si
58-
cuda.Madd4(m, m0, k1, k2, k3, 1, (5./6.)*h, (-8./3.)*h, (5./2.)*h)
69+
cuda.Madd4(m, m0, rk.k1, k2, k3, 1, (5./6.)*h, (-8./3.)*h, (5./2.)*h)
5970
M.normalize()
6071
torqueFn(k4)
6172

6273
// stage 5
6374
Time = t0 + (4./5.)*Dt_si
64-
cuda.Madd5(m, m0, k1, k2, k3, k4, 1, (-8./5.)*h, (144./25.)*h, (-4.)*h, (16./25.)*h)
75+
cuda.Madd5(m, m0, rk.k1, k2, k3, k4, 1, (-8./5.)*h, (144./25.)*h, (-4.)*h, (16./25.)*h)
6576
M.normalize()
6677
torqueFn(k5)
6778

6879
// stage 6
6980
Time = t0 + (1.)*Dt_si
70-
cuda.Madd6(m, m0, k1, k2, k3, k4, k5, 1, (361./320.)*h, (-18./5.)*h, (407./128.)*h, (-11./80.)*h, (55./128.)*h)
81+
cuda.Madd6(m, m0, rk.k1, k2, k3, k4, k5, 1, (361./320.)*h, (-18./5.)*h, (407./128.)*h, (-11./80.)*h, (55./128.)*h)
7182
M.normalize()
7283
torqueFn(k6)
7384

7485
// stage 7
7586
Time = t0
76-
cuda.Madd5(m, m0, k1, k3, k4, k5, 1, (-11./640.)*h, (11./256.)*h, (-11/160.)*h, (11./256.)*h)
87+
cuda.Madd5(m, m0, rk.k1, k3, k4, k5, 1, (-11./640.)*h, (11./256.)*h, (-11/160.)*h, (11./256.)*h)
7788
M.normalize()
7889
torqueFn(k7)
7990

8091
// stage 8
8192
Time = t0 + (1.)*Dt_si
82-
cuda.Madd7(m, m0, k1, k2, k3, k4, k5, k7, 1, (93./640.)*h, (-18./5.)*h, (803./256.)*h, (-11./160.)*h, (99./256.)*h, (1.)*h)
93+
cuda.Madd7(m, m0, rk.k1, k2, k3, k4, k5, k7, 1, (93./640.)*h, (-18./5.)*h, (803./256.)*h, (-11./160.)*h, (99./256.)*h, (1.)*h)
8394
M.normalize()
8495
torqueFn(k8)
8596

8697
// stage 9: 6th order solution
8798
Time = t0 + (1.)*Dt_si
8899
//madd6(m, m0, k1, k3, k4, k5, k6, 1, (31./384.)*h, (1125./2816.)*h, (9./32.)*h, (125./768.)*h, (5./66.)*h)
89-
cuda.Madd7(m, m0, k1, k3, k4, k5, k7, k8, 1, (7./1408.)*h, (1125./2816.)*h, (9./32.)*h, (125./768.)*h, (5./66.)*h, (5./66.)*h)
100+
cuda.Madd7(m, m0, rk.k1, k3, k4, k5, k7, k8, 1, (7./1408.)*h, (1125./2816.)*h, (9./32.)*h, (125./768.)*h, (5./66.)*h, (5./66.)*h)
90101
M.normalize()
91102
torqueFn(k2) // re-use k2
92103

93104
// error estimate
94105
Err := cuda.Buffer(3, size)
95106
defer cuda.Recycle(Err)
96-
cuda.Madd4(Err, k1, k6, k7, k8, (-5. / 66.), (-5. / 66.), (5. / 66.), (5. / 66.))
107+
cuda.Madd4(Err, rk.k1, k6, k7, k8, (-5. / 66.), (-5. / 66.), (5. / 66.), (5. / 66.))
97108

98109
// determine error
99110
err := cuda.MaxVecNorm(Err) * float64(h)
100111

101112
// adjust next time step
102113
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
103-
// step OK
104-
setLastErr(err)
105-
setMaxTorque(k2)
106-
NSteps++
107-
Time = t0 + Dt_si
108-
adaptDt(math.Pow(MaxErr/err, 1./6.))
114+
//Passed absolute error. Check relative error...
115+
errnorm := cuda.Buffer(1, size)
116+
defer cuda.Recycle(errnorm)
117+
cuda.VecNorm(errnorm, Err)
118+
ddtnorm := cuda.Buffer(1, size)
119+
defer cuda.Recycle(ddtnorm)
120+
cuda.VecNorm(ddtnorm, k2)
121+
maxdm := cuda.MaxVecNorm(k2)
122+
fail := 0
123+
rlerr := float64(0.0)
124+
if maxdm < MinSlope { // Only step using relerr if dmdt is big enough. Overcomes equilibrium problem
125+
fail = 0
126+
} else {
127+
cuda.Div(errnorm, errnorm, ddtnorm) //re-use errnorm
128+
rlerr = float64(cuda.MaxAbs(errnorm))
129+
fail = 1
130+
}
131+
if fail == 0 || RelErr <= 0.0 || rlerr < RelErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
132+
// step OK
133+
setLastErr(err)
134+
setMaxTorque(k2)
135+
NSteps++
136+
Time = t0 + Dt_si
137+
if fail == 0 {
138+
adaptDt(math.Pow(MaxErr/err, 1./6.))
139+
} else {
140+
adaptDt(math.Pow(RelErr/rlerr, 1./6.))
141+
}
142+
data.Copy(rk.k1, k2) // FSAL
143+
} else {
144+
// undo bad step
145+
//util.Println("Bad step at t=", t0, ", err=", err)
146+
util.Assert(FixDt == 0)
147+
Time = t0
148+
data.Copy(m, m0)
149+
NUndone++
150+
adaptDt(math.Pow(RelErr/rlerr, 1./7.))
151+
}
109152
} else {
110153
// undo bad step
111154
//util.Println("Bad step at t=", t0, ", err=", err)
@@ -118,4 +161,6 @@ func (rk *RK56) Step() {
118161
}
119162

120163
func (rk *RK56) Free() {
164+
rk.k1.Free()
165+
rk.k1 = nil
121166
}

engine/run.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ var (
2020
Dt_si float64 = 1e-15 // time step = dt_si (seconds) *dt_mul, which should be nice float32
2121
MinDt, MaxDt float64 // minimum and maximum time step
2222
MaxErr float64 = 1e-5 // maximum error/step
23+
RelErr float64 = 1e-4 // maximum relative error/step
24+
MinSlope float64 = 1e8 // minimum delta per unit time before using relative error to step
2325
Headroom float64 = 0.8 // solver headroom, (Gustafsson, 1992, Control of Error and Convergence in ODE Solvers)
2426
LastErr, PeakErr float64 // error of last step, highest error ever
2527
LastTorque float64 // maxTorque of last time step
@@ -39,6 +41,7 @@ func init() {
3941
DeclVar("MinDt", &MinDt, "Minimum time step the solver can take (s)")
4042
DeclVar("MaxDt", &MaxDt, "Maximum time step the solver can take (s)")
4143
DeclVar("MaxErr", &MaxErr, "Maximum error per step the solver can tolerate (default = 1e-5)")
44+
DeclVar("RelErr", &RelErr, "Maximum relative error per step the solver can tolerate (default = 1e-4)")
4245
DeclVar("Headroom", &Headroom, "Solver headroom (default = 0.8)")
4346
DeclVar("FixDt", &FixDt, "Set a fixed time step, 0 disables fixed step (which is the default)")
4447
DeclFunc("Exit", Exit, "Exit from the program")

0 commit comments

Comments
 (0)