Skip to content

Commit 449ed6b

Browse files
committed
Implement basic unit-stride vector load/store
Implement vle8_v, vle16_v, vle32_v, vse8_v, vse16_v, vse32_v. Using loop unrolling technique to handle a word at a time. The implementation assumes VLEN = 128. There are two types of illegal instructions: 1. When eew is narrower than csr_vl. Set vill in vtype to 1 and other bits to 0, set csr_vl to 0. 2. When LMUL > 1 and trying to access a vector register that is larger than 31. Use assert to handle this case.
1 parent 0934f19 commit 449ed6b

File tree

1 file changed

+210
-6
lines changed

1 file changed

+210
-6
lines changed

src/rv32_template.c

Lines changed: 210 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3156,19 +3156,124 @@ RVOP(
31563156

31573157
RVOP(
31583158
vle8_v,
3159-
{ NO_IMP; },
3159+
{
3160+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
3161+
uint32_t addr = rv->X[ir->rs1];
3162+
3163+
if (ir->eew > sew) {
3164+
/* Illegal */
3165+
rv->csr_vtype = 0x80000000;
3166+
rv->csr_vl = 0;
3167+
return true;
3168+
} else {
3169+
uint8_t i = 0;
3170+
uint8_t j = 0;
3171+
for (uint32_t cnt = 0; rv->csr_vl - cnt >= 4;) {
3172+
i %= VREG_U32_COUNT;
3173+
/* Set illegal when trying to access vector register that is
3174+
* larger then 31.
3175+
*/
3176+
assert(ir->vd + j < 32);
3177+
/* Process full 32-bit words */
3178+
rv->V[ir->vd + j][i] = 0;
3179+
rv->V[ir->vd + j][i] |= rv->io.mem_read_b(rv, addr);
3180+
rv->V[ir->vd + j][i] |= rv->io.mem_read_b(rv, addr + 1) << 8;
3181+
rv->V[ir->vd + j][i] |= rv->io.mem_read_b(rv, addr + 2) << 16;
3182+
rv->V[ir->vd + j][i] |= rv->io.mem_read_b(rv, addr + 3) << 24;
3183+
cnt += 4;
3184+
i++;
3185+
3186+
/* Move to next vector register after filling VLEN */
3187+
if (!(cnt % (VREG_U32_COUNT << 2))) {
3188+
j++;
3189+
i = 0;
3190+
}
3191+
addr += 4;
3192+
}
3193+
/* Clear corresponding bits of eews */
3194+
if (rv->csr_vl % 4) {
3195+
rv->V[ir->vd + j][i] %= 0xFFFFFFFF << ((rv->csr_vl % 4) << 3);
3196+
}
3197+
/* Handle eews that is narrower then a word */
3198+
for (uint32_t cnt = 0; cnt < (rv->csr_vl % 4); cnt++) {
3199+
assert(ir->vd + j < 32); /* Illegal */
3200+
rv->V[ir->vd + j][i] |= rv->io.mem_read_b(rv, addr + cnt)
3201+
<< (cnt << 3);
3202+
}
3203+
}
3204+
},
31603205
GEN({
31613206
assert; /* FIXME: Implement */
31623207
}))
31633208
RVOP(
31643209
vle16_v,
3165-
{ NO_IMP; },
3210+
{
3211+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
3212+
uint32_t addr = rv->X[ir->rs1];
3213+
3214+
if (ir->eew > sew) {
3215+
/* Illegal */
3216+
rv->csr_vtype = 0x80000000;
3217+
rv->csr_vl = 0;
3218+
return true;
3219+
} else {
3220+
uint8_t i = 0;
3221+
uint8_t j = 0;
3222+
for (uint32_t cnt = 0; rv->csr_vl - cnt >= 2;) {
3223+
i %= VREG_U32_COUNT;
3224+
assert(ir->vd + j < 32);
3225+
/* Process full 32-bit words */
3226+
rv->V[ir->vd + j][i] = 0;
3227+
rv->V[ir->vd + j][i] |= rv->io.mem_read_s(rv, addr);
3228+
rv->V[ir->vd + j][i] |= rv->io.mem_read_s(rv, addr + 2) << 16;
3229+
cnt += 2;
3230+
i++;
3231+
3232+
/* Move to next vector register after filling VLEN */
3233+
if (!(cnt % (VREG_U32_COUNT << 1))) {
3234+
j++;
3235+
i = 0;
3236+
}
3237+
addr += 4;
3238+
}
3239+
if (rv->csr_vl % 2) {
3240+
assert(ir->vd + j < 32); /* Illegal */
3241+
rv->V[ir->vd + j][i] |= rv->io.mem_read_s(rv, addr);
3242+
}
3243+
}
3244+
},
31663245
GEN({
31673246
assert; /* FIXME: Implement */
31683247
}))
31693248
RVOP(
31703249
vle32_v,
3171-
{ NO_IMP; },
3250+
{
3251+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
3252+
uint32_t addr = rv->X[ir->rs1];
3253+
3254+
if (ir->eew > sew) {
3255+
/* Illegal */
3256+
rv->csr_vtype = 0x80000000;
3257+
rv->csr_vl = 0;
3258+
return true;
3259+
} else {
3260+
uint8_t i = 0;
3261+
uint8_t j = 0;
3262+
for (uint32_t cnt = 0; rv->csr_vl > cnt;) {
3263+
i %= VREG_U32_COUNT;
3264+
assert(ir->vd + j < 32);
3265+
rv->V[ir->vd + j][i] = rv->io.mem_read_w(rv, addr);
3266+
cnt += 1;
3267+
i++;
3268+
3269+
if (!(cnt % VREG_U32_COUNT)) {
3270+
j++;
3271+
i = 0;
3272+
}
3273+
addr += 4;
3274+
}
3275+
}
3276+
},
31723277
GEN({
31733278
assert; /* FIXME: Implement */
31743279
}))
@@ -4219,19 +4324,118 @@ RVOP(
42194324

42204325
RVOP(
42214326
vse8_v,
4222-
{ NO_IMP; },
4327+
{
4328+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
4329+
uint32_t addr = rv->X[ir->rs1];
4330+
4331+
if (ir->eew > sew) {
4332+
/* Illegal */
4333+
rv->csr_vtype = 0x80000000;
4334+
rv->csr_vl = 0;
4335+
return true;
4336+
} else {
4337+
uint8_t i = 0;
4338+
uint8_t j = 0;
4339+
for (uint32_t cnt = 0; rv->csr_vl - cnt >= 4;) {
4340+
i %= VREG_U32_COUNT;
4341+
/* Set illegal when trying to access vector register that is
4342+
* larger then 31.
4343+
*/
4344+
assert(ir->vs3 + j < 32);
4345+
uint32_t tmp = rv->V[ir->vs3 + j][i];
4346+
/* Process full 32-bit words */
4347+
rv->io.mem_write_b(rv, addr, (tmp) & 0xff);
4348+
rv->io.mem_write_b(rv, addr + 1, (tmp >> 8) & 0xff);
4349+
rv->io.mem_write_b(rv, addr + 2, (tmp >> 16) & 0xff);
4350+
rv->io.mem_write_b(rv, addr + 3, (tmp >> 24) & 0xff);
4351+
cnt += 4;
4352+
i++;
4353+
4354+
/* Move to next vector register after filling VLEN */
4355+
if (!(cnt % (VREG_U32_COUNT << 2))) {
4356+
j++;
4357+
i = 0;
4358+
}
4359+
addr += 4;
4360+
}
4361+
/* Handle eews that is narrower then a word */
4362+
for (uint32_t cnt = 0; cnt < (rv->csr_vl % 4); cnt++) {
4363+
assert(ir->vs3 + j < 32); /* Illegal */
4364+
uint8_t tmp = (rv->V[ir->vs3 + j][i] >> (cnt << 3)) & 0xff;
4365+
rv->io.mem_write_b(rv, addr + cnt, tmp);
4366+
}
4367+
}
4368+
},
42234369
GEN({
42244370
assert; /* FIXME: Implement */
42254371
}))
42264372
RVOP(
42274373
vse16_v,
4228-
{ NO_IMP; },
4374+
{
4375+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
4376+
uint32_t addr = rv->X[ir->rs1];
4377+
4378+
if (ir->eew > sew) {
4379+
/* Illegal */
4380+
rv->csr_vtype = 0x80000000;
4381+
rv->csr_vl = 0;
4382+
return true;
4383+
} else {
4384+
uint8_t i = 0;
4385+
uint8_t j = 0;
4386+
for (uint32_t cnt = 0; rv->csr_vl - cnt >= 2;) {
4387+
i %= VREG_U32_COUNT;
4388+
assert(ir->vs3 + j < 32);
4389+
uint32_t tmp = rv->V[ir->vs3 + j][i];
4390+
/* Process full 32-bit words */
4391+
rv->io.mem_write_s(rv, addr, (tmp) & 0xffff);
4392+
rv->io.mem_write_s(rv, addr + 2, (tmp >> 16) & 0xffff);
4393+
cnt += 2;
4394+
i++;
4395+
4396+
if (!(cnt % (VREG_U32_COUNT << 1))) {
4397+
j++;
4398+
i = 0;
4399+
}
4400+
addr += 4;
4401+
}
4402+
if (rv->csr_vl % 2) {
4403+
rv->io.mem_write_s(rv, addr, rv->V[ir->vs3 + j][i] & 0xffff);
4404+
}
4405+
}
4406+
},
42294407
GEN({
42304408
assert; /* FIXME: Implement */
42314409
}))
42324410
RVOP(
42334411
vse32_v,
4234-
{ NO_IMP; },
4412+
{
4413+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
4414+
uint32_t addr = rv->X[ir->rs1];
4415+
4416+
if (ir->eew > sew) {
4417+
/* Illegal */
4418+
rv->csr_vtype = 0x80000000;
4419+
rv->csr_vl = 0;
4420+
return true;
4421+
} else {
4422+
uint8_t i = 0;
4423+
uint8_t j = 0;
4424+
for (uint32_t cnt = 0; rv->csr_vl > cnt;) {
4425+
i %= VREG_U32_COUNT;
4426+
assert(ir->vs3 + j < 32);
4427+
rv->io.mem_write_w(rv, addr, rv->V[ir->vs3 + j][i]);
4428+
cnt += 1;
4429+
i++;
4430+
4431+
if (!(cnt % (VREG_U32_COUNT))) {
4432+
j++;
4433+
i = 0;
4434+
}
4435+
addr += 4;
4436+
}
4437+
}
4438+
},
42354439
GEN({
42364440
assert; /* FIXME: Implement */
42374441
}))

0 commit comments

Comments
 (0)