8
8
)
9
9
10
10
type RK56 struct {
11
+ k1 * data.Slice // torque at end of step is kept for beginning of next step
11
12
}
12
13
13
14
func (rk * RK56 ) Step () {
@@ -19,14 +20,24 @@ func (rk *RK56) Step() {
19
20
Dt_si = FixDt
20
21
}
21
22
23
+ // upon resize: remove wrongly sized k1
24
+ if rk .k1 .Size () != m .Size () {
25
+ rk .Free ()
26
+ }
27
+
28
+ // first step ever: one-time k1 init and eval
29
+ if rk .k1 == nil {
30
+ rk .k1 = cuda .NewSlice (3 , size )
31
+ torqueFn (rk .k1 )
32
+ }
33
+
22
34
t0 := Time
23
35
// backup magnetization
24
36
m0 := cuda .Buffer (3 , size )
25
37
defer cuda .Recycle (m0 )
26
38
data .Copy (m0 , m )
27
39
28
- k1 , k2 , k3 , k4 , k5 , k6 , k7 , k8 := cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size )
29
- defer cuda .Recycle (k1 )
40
+ k2 , k3 , k4 , k5 , k6 , k7 , k8 := cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size ), cuda .Buffer (3 , size )
30
41
defer cuda .Recycle (k2 )
31
42
defer cuda .Recycle (k3 )
32
43
defer cuda .Recycle (k4 )
@@ -39,73 +50,105 @@ func (rk *RK56) Step() {
39
50
h := float32 (Dt_si * GammaLL ) // internal time step = Dt * gammaLL
40
51
41
52
// stage 1
42
- torqueFn (k1 )
53
+ torqueFn (rk . k1 )
43
54
44
55
// stage 2
45
56
Time = t0 + (1. / 6. )* Dt_si
46
- cuda .Madd2 (m , m , k1 , 1 , (1. / 6. )* h ) // m = m*1 + k1*h/6
57
+ cuda .Madd2 (m , m , rk . k1 , 1 , (1. / 6. )* h ) // m = m*1 + k1*h/6
47
58
M .normalize ()
48
59
torqueFn (k2 )
49
60
50
61
// stage 3
51
62
Time = t0 + (4. / 15. )* Dt_si
52
- cuda .Madd3 (m , m0 , k1 , k2 , 1 , (4. / 75. )* h , (16. / 75. )* h )
63
+ cuda .Madd3 (m , m0 , rk . k1 , k2 , 1 , (4. / 75. )* h , (16. / 75. )* h )
53
64
M .normalize ()
54
65
torqueFn (k3 )
55
66
56
67
// stage 4
57
68
Time = t0 + (2. / 3. )* Dt_si
58
- cuda .Madd4 (m , m0 , k1 , k2 , k3 , 1 , (5. / 6. )* h , (- 8. / 3. )* h , (5. / 2. )* h )
69
+ cuda .Madd4 (m , m0 , rk . k1 , k2 , k3 , 1 , (5. / 6. )* h , (- 8. / 3. )* h , (5. / 2. )* h )
59
70
M .normalize ()
60
71
torqueFn (k4 )
61
72
62
73
// stage 5
63
74
Time = t0 + (4. / 5. )* Dt_si
64
- cuda .Madd5 (m , m0 , k1 , k2 , k3 , k4 , 1 , (- 8. / 5. )* h , (144. / 25. )* h , (- 4. )* h , (16. / 25. )* h )
75
+ cuda .Madd5 (m , m0 , rk . k1 , k2 , k3 , k4 , 1 , (- 8. / 5. )* h , (144. / 25. )* h , (- 4. )* h , (16. / 25. )* h )
65
76
M .normalize ()
66
77
torqueFn (k5 )
67
78
68
79
// stage 6
69
80
Time = t0 + (1. )* Dt_si
70
- cuda .Madd6 (m , m0 , k1 , k2 , k3 , k4 , k5 , 1 , (361. / 320. )* h , (- 18. / 5. )* h , (407. / 128. )* h , (- 11. / 80. )* h , (55. / 128. )* h )
81
+ cuda .Madd6 (m , m0 , rk . k1 , k2 , k3 , k4 , k5 , 1 , (361. / 320. )* h , (- 18. / 5. )* h , (407. / 128. )* h , (- 11. / 80. )* h , (55. / 128. )* h )
71
82
M .normalize ()
72
83
torqueFn (k6 )
73
84
74
85
// stage 7
75
86
Time = t0
76
- cuda .Madd5 (m , m0 , k1 , k3 , k4 , k5 , 1 , (- 11. / 640. )* h , (11. / 256. )* h , (- 11 / 160. )* h , (11. / 256. )* h )
87
+ cuda .Madd5 (m , m0 , rk . k1 , k3 , k4 , k5 , 1 , (- 11. / 640. )* h , (11. / 256. )* h , (- 11 / 160. )* h , (11. / 256. )* h )
77
88
M .normalize ()
78
89
torqueFn (k7 )
79
90
80
91
// stage 8
81
92
Time = t0 + (1. )* Dt_si
82
- cuda .Madd7 (m , m0 , k1 , k2 , k3 , k4 , k5 , k7 , 1 , (93. / 640. )* h , (- 18. / 5. )* h , (803. / 256. )* h , (- 11. / 160. )* h , (99. / 256. )* h , (1. )* h )
93
+ cuda .Madd7 (m , m0 , rk . k1 , k2 , k3 , k4 , k5 , k7 , 1 , (93. / 640. )* h , (- 18. / 5. )* h , (803. / 256. )* h , (- 11. / 160. )* h , (99. / 256. )* h , (1. )* h )
83
94
M .normalize ()
84
95
torqueFn (k8 )
85
96
86
97
// stage 9: 6th order solution
87
98
Time = t0 + (1. )* Dt_si
88
99
//madd6(m, m0, k1, k3, k4, k5, k6, 1, (31./384.)*h, (1125./2816.)*h, (9./32.)*h, (125./768.)*h, (5./66.)*h)
89
- cuda .Madd7 (m , m0 , k1 , k3 , k4 , k5 , k7 , k8 , 1 , (7. / 1408. )* h , (1125. / 2816. )* h , (9. / 32. )* h , (125. / 768. )* h , (5. / 66. )* h , (5. / 66. )* h )
100
+ cuda .Madd7 (m , m0 , rk . k1 , k3 , k4 , k5 , k7 , k8 , 1 , (7. / 1408. )* h , (1125. / 2816. )* h , (9. / 32. )* h , (125. / 768. )* h , (5. / 66. )* h , (5. / 66. )* h )
90
101
M .normalize ()
91
102
torqueFn (k2 ) // re-use k2
92
103
93
104
// error estimate
94
105
Err := cuda .Buffer (3 , size )
95
106
defer cuda .Recycle (Err )
96
- cuda .Madd4 (Err , k1 , k6 , k7 , k8 , (- 5. / 66. ), (- 5. / 66. ), (5. / 66. ), (5. / 66. ))
107
+ cuda .Madd4 (Err , rk . k1 , k6 , k7 , k8 , (- 5. / 66. ), (- 5. / 66. ), (5. / 66. ), (5. / 66. ))
97
108
98
109
// determine error
99
110
err := cuda .MaxVecNorm (Err ) * float64 (h )
100
111
101
112
// adjust next time step
102
113
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
103
- // step OK
104
- setLastErr (err )
105
- setMaxTorque (k2 )
106
- NSteps ++
107
- Time = t0 + Dt_si
108
- adaptDt (math .Pow (MaxErr / err , 1. / 6. ))
114
+ //Passed absolute error. Check relative error...
115
+ errnorm := cuda .Buffer (1 , size )
116
+ defer cuda .Recycle (errnorm )
117
+ cuda .VecNorm (errnorm , Err )
118
+ ddtnorm := cuda .Buffer (1 , size )
119
+ defer cuda .Recycle (ddtnorm )
120
+ cuda .VecNorm (ddtnorm , k2 )
121
+ maxdm := cuda .MaxVecNorm (k2 )
122
+ fail := 0
123
+ rlerr := float64 (0.0 )
124
+ if maxdm < MinSlope { // Only step using relerr if dmdt is big enough. Overcomes equilibrium problem
125
+ fail = 0
126
+ } else {
127
+ cuda .Div (errnorm , errnorm , ddtnorm ) //re-use errnorm
128
+ rlerr = float64 (cuda .MaxAbs (errnorm ))
129
+ fail = 1
130
+ }
131
+ if fail == 0 || RelErr <= 0.0 || rlerr < RelErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
132
+ // step OK
133
+ setLastErr (err )
134
+ setMaxTorque (k2 )
135
+ NSteps ++
136
+ Time = t0 + Dt_si
137
+ if fail == 0 {
138
+ adaptDt (math .Pow (MaxErr / err , 1. / 6. ))
139
+ } else {
140
+ adaptDt (math .Pow (RelErr / rlerr , 1. / 6. ))
141
+ }
142
+ data .Copy (rk .k1 , k2 ) // FSAL
143
+ } else {
144
+ // undo bad step
145
+ //util.Println("Bad step at t=", t0, ", err=", err)
146
+ util .Assert (FixDt == 0 )
147
+ Time = t0
148
+ data .Copy (m , m0 )
149
+ NUndone ++
150
+ adaptDt (math .Pow (RelErr / rlerr , 1. / 7. ))
151
+ }
109
152
} else {
110
153
// undo bad step
111
154
//util.Println("Bad step at t=", t0, ", err=", err)
@@ -118,4 +161,6 @@ func (rk *RK56) Step() {
118
161
}
119
162
120
163
func (rk * RK56 ) Free () {
164
+ rk .k1 .Free ()
165
+ rk .k1 = nil
121
166
}
0 commit comments