From 5e23086531635e91c5424a5a2397ffd7f9c9aef2 Mon Sep 17 00:00:00 2001
From: Amaan Qureshi <amaanq12@gmail.com>
Date: Sun, 6 Apr 2025 13:21:46 +0800
Subject: [PATCH 1/4] Squashed commit of the following:

    feat!: update bindings

    feat!: update architecture headers

    build: update CMake files & symbols

    feat!: updatae TriCore to 5.1.0

    feat!: update SPARC to 5.1.0

    feat!: update S390X to 5.1.0

    feat!: update RISC-V to 5.1.0

    feat!: update PPC to 5.1.0

    feat!: update MIPS to 5.1.0

    feat!: update M68K to 5.1.0

    feat!: update i386 to 5.1.0

    feat!: update ARM to 5.1.0

    feat!: update TCG to 5.1.0

    feat!: update FPU to 5.1.0

    feat!: update QEMU core to 5.1.0
---
 CMakeLists.txt                                |    4 +
 bindings/dotnet/UnicornEngine/Const/Mips.fs   |    6 +-
 bindings/dotnet/UnicornEngine/Const/Riscv.fs  |   12 +-
 bindings/go/unicorn/mips_const.go             |    6 +-
 bindings/go/unicorn/riscv_const.go            |   12 +-
 .../java/src/main/java/unicorn/MipsConst.java |    6 +-
 .../src/main/java/unicorn/RiscvConst.java     |   12 +-
 bindings/pascal/unicorn/MipsConst.pas         |    8 +-
 bindings/pascal/unicorn/RiscvConst.pas        |   14 +-
 bindings/python/unicorn/mips_const.py         |    6 +-
 bindings/python/unicorn/riscv_const.py        |   12 +-
 .../lib/unicorn_engine/mips_const.rb          |    6 +-
 .../lib/unicorn_engine/riscv_const.rb         |   12 +-
 bindings/rust/src/riscv.rs                    |    6 +-
 bindings/zig/unicorn/mips_const.zig           |    6 +-
 bindings/zig/unicorn/riscv_const.zig          |   10 +-
 include/unicorn/mips.h                        |    2 +
 include/unicorn/riscv.h                       |    6 +-
 include/unicorn/unicorn.h                     |   12 +
 qemu/aarch64.h                                |  117 +-
 qemu/accel/tcg/cputlb.c                       |  367 +-
 qemu/accel/tcg/tcg-runtime-gvec.c             |  144 +
 qemu/accel/tcg/tcg-runtime.h                  |   15 +
 qemu/accel/tcg/translate-all.c                |    8 +-
 qemu/arm.h                                    |  192 +-
 qemu/exec.c                                   |   32 +-
 qemu/fpu/softfloat-specialize.inc.c           |   36 +-
 qemu/fpu/softfloat.c                          | 1632 ++---
 qemu/include/elf.h                            |    4 +
 qemu/include/exec/cpu-all.h                   |    1 +
 qemu/include/exec/cpu-common.h                |    3 -
 qemu/include/exec/cpu-defs.h                  |    7 +-
 qemu/include/exec/cpu_ldst.h                  |  176 +-
 qemu/include/exec/exec-all.h                  |   44 +
 qemu/include/fpu/softfloat-helpers.h          |   24 +-
 qemu/include/fpu/softfloat-macros.h           |   16 +-
 qemu/include/fpu/softfloat-types.h            |   39 +-
 qemu/include/fpu/softfloat.h                  |  359 +-
 qemu/include/hw/registerfields.h              |   32 +-
 qemu/include/qemu/bswap.h                     |    2 +
 qemu/include/qemu/compiler.h                  |    2 +
 qemu/include/qemu/host-utils.h                |    4 +-
 qemu/include/qemu/osdep.h                     |   81 +-
 qemu/include/tcg/tcg-op-gvec.h                |  296 +-
 qemu/include/tcg/tcg-op.h                     |   13 +-
 qemu/include/tcg/tcg-opc.h                    |   21 +-
 qemu/include/tcg/tcg.h                        |    5 +-
 qemu/m68k.h                                   |   97 +-
 qemu/mips.h                                   |  161 +-
 qemu/mips64.h                                 |  161 +-
 qemu/mips64el.h                               |  161 +-
 qemu/mipsel.h                                 |  161 +-
 qemu/ppc.h                                    |  360 +-
 qemu/ppc64.h                                  |  360 +-
 qemu/riscv32.h                                | 1099 +++-
 qemu/riscv64.h                                | 1099 +++-
 qemu/s390x.h                                  |   96 +-
 qemu/sparc.h                                  |   96 +-
 qemu/sparc64.h                                |   96 +-
 qemu/target/arm/backup.c                      | 5431 +++++++++++++++++
 qemu/target/arm/cpu-param.h                   |    2 +-
 qemu/target/arm/cpu-qom.h                     |    9 +-
 qemu/target/arm/cpu.c                         |   17 +-
 qemu/target/arm/cpu.h                         |  107 +-
 qemu/target/arm/cpu64.c                       |   19 +-
 qemu/target/arm/crypto_helper.c               |  272 +-
 qemu/target/arm/decode-a32.inc.c              |    4 +-
 qemu/target/arm/decode-neon-dp.inc.c          | 2806 +++++++++
 qemu/target/arm/decode-neon-ls.inc.c          |  149 +
 qemu/target/arm/decode-neon-shared.inc.c      |  271 +
 qemu/target/arm/decode-sve.inc.c              |    4 +-
 qemu/target/arm/helper-a64.c                  |   90 +-
 qemu/target/arm/helper-a64.h                  |   16 +
 qemu/target/arm/helper-sve.h                  |  542 +-
 qemu/target/arm/helper.c                      |  743 ++-
 qemu/target/arm/helper.h                      |  160 +-
 qemu/target/arm/internals.h                   |  159 +-
 qemu/target/arm/m_helper.c                    |    9 +-
 qemu/target/arm/mte_helper.c                  |  913 +++
 qemu/target/arm/neon_helper.c                 |   41 -
 qemu/target/arm/op_helper.c                   |   17 +
 qemu/target/arm/pauth_helper.c                |    6 +-
 qemu/target/arm/sve_helper.c                  | 4384 +++++++------
 qemu/target/arm/tlb_helper.c                  |    6 +-
 qemu/target/arm/translate-a64.c               | 3283 +++++-----
 qemu/target/arm/translate-a64.h               |   16 +-
 qemu/target/arm/translate-neon.inc.c          | 4276 +++++++++++++
 qemu/target/arm/translate-sve.c               | 1510 +++--
 qemu/target/arm/translate-vfp.inc.c           |   15 +-
 qemu/target/arm/translate.c                   | 4913 ++++-----------
 qemu/target/arm/translate.h                   |  131 +-
 qemu/target/arm/vec_helper.c                  |  274 +-
 qemu/target/arm/vec_internal.h                |   33 +
 qemu/target/arm/vfp_helper.c                  |   25 +-
 qemu/target/i386/cpu.c                        |  108 +-
 qemu/target/i386/cpu.h                        |   18 +
 qemu/target/i386/excp_helper.c                |    4 +-
 qemu/target/i386/fpu_helper.c                 | 1738 +++++-
 qemu/target/i386/helper.h                     |    1 +
 qemu/target/i386/ops_sse.h                    |   90 +-
 qemu/target/i386/svm.h                        |    1 +
 qemu/target/i386/svm_helper.c                 |    7 +-
 qemu/target/i386/translate.c                  |   34 +-
 qemu/target/m68k/cpu.c                        |    6 -
 qemu/target/m68k/fpu_helper.c                 |   11 +-
 qemu/target/m68k/helper.c                     |   16 +-
 qemu/target/m68k/helper.h                     |    1 +
 qemu/target/m68k/softfloat.c                  |  151 +-
 qemu/target/m68k/softfloat.h                  |    1 -
 qemu/target/m68k/translate.c                  |   17 +
 qemu/target/mips/cp0_helper.c                 |   11 +-
 qemu/target/mips/cpu-param.h                  |    3 +-
 qemu/target/mips/cpu.h                        |   28 +
 qemu/target/mips/fpu_helper.c                 |  659 +-
 qemu/target/mips/helper.c                     |    4 +
 qemu/target/mips/helper.h                     |   73 +-
 qemu/target/mips/internal.h                   |    3 +-
 qemu/target/mips/mips-defs.h                  |   51 +-
 qemu/target/mips/msa_helper.c                 | 1408 ++++-
 qemu/target/mips/op_helper.c                  |    4 +
 qemu/target/mips/translate.c                  |  219 +-
 qemu/target/mips/translate_init.inc.c         |   95 +-
 qemu/target/ppc/cpu.h                         |   33 +-
 qemu/target/ppc/dfp_helper.c                  |    4 +-
 qemu/target/ppc/excp_helper.c                 |  131 +-
 qemu/target/ppc/helper.h                      |    5 +-
 qemu/target/ppc/int_helper.c                  |   23 +-
 qemu/target/ppc/translate.c                   |   67 +-
 qemu/target/ppc/translate/fp-impl.inc.c       |  732 ++-
 qemu/target/ppc/translate/vmx-impl.inc.c      |   41 +-
 qemu/target/ppc/translate/vsx-impl.inc.c      |    2 +-
 qemu/target/ppc/translate_init.inc.c          |   14 +-
 qemu/target/riscv/cpu.c                       |  159 +-
 qemu/target/riscv/cpu.h                       |   81 +-
 qemu/target/riscv/cpu_bits.h                  |   15 +
 qemu/target/riscv/cpu_helper.c                |  125 +-
 qemu/target/riscv/csr.c                       |  222 +-
 qemu/target/riscv/fpu_helper.c                |   33 +-
 qemu/target/riscv/helper.h                    | 1074 ++++
 .../riscv/insn_trans/trans_privileged.inc.c   |   50 +-
 qemu/target/riscv/insn_trans/trans_rvd.inc.c  |    2 +-
 qemu/target/riscv/insn_trans/trans_rvf.inc.c  |   20 +-
 qemu/target/riscv/insn_trans/trans_rvh.inc.c  |   33 +
 qemu/target/riscv/insn_trans/trans_rvv.inc.c  | 2954 +++++++++
 qemu/target/riscv/internals.h                 |   41 +
 qemu/target/riscv/op_helper.c                 |   31 +-
 qemu/target/riscv/pmp.c                       |   18 +-
 qemu/target/riscv/riscv32/decode_insn16.inc.c |   51 +-
 qemu/target/riscv/riscv32/decode_insn32.inc.c | 3242 +++++++++-
 qemu/target/riscv/riscv64/decode_insn16.inc.c |   55 +-
 qemu/target/riscv/riscv64/decode_insn32.inc.c | 3385 ++++++++--
 qemu/target/riscv/translate.c                 |   26 +-
 qemu/target/riscv/vector_helper.c             | 4913 +++++++++++++++
 qemu/target/s390x/cpu_features_def.inc.h      |    3 +-
 qemu/target/s390x/fpu_helper.c                |   22 +-
 qemu/target/s390x/gen-features.c              |    1 +
 qemu/target/s390x/helper.h                    |    4 -
 qemu/target/s390x/insn-data.def               |    6 +-
 qemu/target/s390x/internal.h                  |    3 +-
 qemu/target/s390x/translate.c                 |    3 +-
 qemu/target/s390x/translate_vx.inc.c          |  109 +-
 qemu/target/s390x/vec_fpu_helper.c            |    2 +-
 qemu/target/s390x/vec_int_helper.c            |   31 -
 qemu/target/sparc/fop_helper.c                |    4 +-
 qemu/target/tricore/translate.c               |    1 -
 qemu/tcg/README                               |    7 +-
 qemu/tcg/aarch64/tcg-target.h                 |    3 +
 qemu/tcg/aarch64/tcg-target.inc.c             |   77 +-
 qemu/tcg/aarch64/tcg-target.opc.h             |    1 +
 qemu/tcg/arm/tcg-target.inc.c                 |    2 -
 qemu/tcg/i386/tcg-target.h                    |    3 +
 qemu/tcg/i386/tcg-target.inc.c                |  120 +-
 qemu/tcg/mips/tcg-target.inc.c                |    2 -
 qemu/tcg/ppc/tcg-target.h                     |    3 +
 qemu/tcg/ppc/tcg-target.inc.c                 |   42 +-
 qemu/tcg/ppc/tcg-target.opc.h                 |    1 -
 qemu/tcg/riscv/tcg-target.inc.c               |    4 -
 qemu/tcg/s390/tcg-target.inc.c                |    2 -
 qemu/tcg/sparc/tcg-target.inc.c               |    2 -
 qemu/tcg/tcg-op-gvec.c                        |  377 +-
 qemu/tcg/tcg-op-vec.c                         |   63 +-
 qemu/tcg/tcg-op.c                             |   16 +-
 qemu/tcg/tcg.c                                |   85 +-
 qemu/tricore.h                                |   96 +-
 qemu/util/guest-random.c                      |    1 -
 qemu/x86_64.h                                 |   97 +-
 symbols.sh                                    | 1579 ++++-
 uc.c                                          |    1 -
 188 files changed, 50201 insertions(+), 13346 deletions(-)
 create mode 100644 qemu/target/arm/backup.c
 create mode 100644 qemu/target/arm/decode-neon-dp.inc.c
 create mode 100644 qemu/target/arm/decode-neon-ls.inc.c
 create mode 100644 qemu/target/arm/decode-neon-shared.inc.c
 create mode 100644 qemu/target/arm/mte_helper.c
 create mode 100644 qemu/target/arm/translate-neon.inc.c
 create mode 100644 qemu/target/arm/vec_internal.h
 create mode 100644 qemu/target/riscv/insn_trans/trans_rvh.inc.c
 create mode 100644 qemu/target/riscv/insn_trans/trans_rvv.inc.c
 create mode 100644 qemu/target/riscv/internals.h
 create mode 100644 qemu/target/riscv/vector_helper.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e4df966aed..81c56aa5e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -568,6 +568,7 @@ add_library(arm-softmmu STATIC
     qemu/target/arm/helper.c
     qemu/target/arm/iwmmxt_helper.c
     qemu/target/arm/m_helper.c
+    qemu/target/arm/mte_helper.c
     qemu/target/arm/neon_helper.c
     qemu/target/arm/op_helper.c
     qemu/target/arm/psci.c
@@ -612,6 +613,7 @@ add_library(aarch64-softmmu STATIC
     qemu/target/arm/helper.c
     qemu/target/arm/iwmmxt_helper.c
     qemu/target/arm/m_helper.c
+    qemu/target/arm/mte_helper.c
     qemu/target/arm/neon_helper.c
     qemu/target/arm/op_helper.c
     qemu/target/arm/pauth_helper.c
@@ -1019,6 +1021,7 @@ add_library(riscv32-softmmu STATIC
     qemu/target/riscv/pmp.c
     qemu/target/riscv/translate.c
     qemu/target/riscv/unicorn.c
+    qemu/target/riscv/vector_helper.c
 )
 
 if(MSVC)
@@ -1052,6 +1055,7 @@ add_library(riscv64-softmmu STATIC
     qemu/target/riscv/pmp.c
     qemu/target/riscv/translate.c
     qemu/target/riscv/unicorn.c
+    qemu/target/riscv/vector_helper.c
 )
 
 if(MSVC)
diff --git a/bindings/dotnet/UnicornEngine/Const/Mips.fs b/bindings/dotnet/UnicornEngine/Const/Mips.fs
index 76d0c71494..74c4946442 100644
--- a/bindings/dotnet/UnicornEngine/Const/Mips.fs
+++ b/bindings/dotnet/UnicornEngine/Const/Mips.fs
@@ -41,8 +41,10 @@ module Mips =
     let UC_CPU_MIPS64_I6500 = 9
     let UC_CPU_MIPS64_LOONGSON_2E = 10
     let UC_CPU_MIPS64_LOONGSON_2F = 11
-    let UC_CPU_MIPS64_MIPS64DSPR2 = 12
-    let UC_CPU_MIPS64_ENDING = 13
+    let UC_CPU_MIPS64_LOONGSON_3A1000 = 12
+    let UC_CPU_MIPS64_LOONGSON_3A4000 = 13
+    let UC_CPU_MIPS64_MIPS64DSPR2 = 14
+    let UC_CPU_MIPS64_ENDING = 15
 
     // MIPS registers
 
diff --git a/bindings/dotnet/UnicornEngine/Const/Riscv.fs b/bindings/dotnet/UnicornEngine/Const/Riscv.fs
index 244e5fec45..d791cfcd5f 100644
--- a/bindings/dotnet/UnicornEngine/Const/Riscv.fs
+++ b/bindings/dotnet/UnicornEngine/Const/Riscv.fs
@@ -10,15 +10,17 @@ module Riscv =
     // RISCV32 CPU
 
     let UC_CPU_RISCV32_ANY = 0
-    let UC_CPU_RISCV32_BASE32 = 1
-    let UC_CPU_RISCV32_SIFIVE_E31 = 2
-    let UC_CPU_RISCV32_SIFIVE_U34 = 3
-    let UC_CPU_RISCV32_ENDING = 4
+    let UC_CPU_RISCV32_BASE = 1
+    let UC_CPU_RISCV32_IBEX = 2
+    let UC_CPU_RISCV32_SIFIVE_E31 = 3
+    let UC_CPU_RISCV32_SIFIVE_E34 = 4
+    let UC_CPU_RISCV32_SIFIVE_U34 = 5
+    let UC_CPU_RISCV32_ENDING = 6
 
     // RISCV64 CPU
 
     let UC_CPU_RISCV64_ANY = 0
-    let UC_CPU_RISCV64_BASE64 = 1
+    let UC_CPU_RISCV64_BASE = 1
     let UC_CPU_RISCV64_SIFIVE_E51 = 2
     let UC_CPU_RISCV64_SIFIVE_U54 = 3
     let UC_CPU_RISCV64_ENDING = 4
diff --git a/bindings/go/unicorn/mips_const.go b/bindings/go/unicorn/mips_const.go
index dfb6ddb066..c78b813e2b 100644
--- a/bindings/go/unicorn/mips_const.go
+++ b/bindings/go/unicorn/mips_const.go
@@ -36,8 +36,10 @@ const (
 	CPU_MIPS64_I6500 = 9
 	CPU_MIPS64_LOONGSON_2E = 10
 	CPU_MIPS64_LOONGSON_2F = 11
-	CPU_MIPS64_MIPS64DSPR2 = 12
-	CPU_MIPS64_ENDING = 13
+	CPU_MIPS64_LOONGSON_3A1000 = 12
+	CPU_MIPS64_LOONGSON_3A4000 = 13
+	CPU_MIPS64_MIPS64DSPR2 = 14
+	CPU_MIPS64_ENDING = 15
 
 // MIPS registers
 
diff --git a/bindings/go/unicorn/riscv_const.go b/bindings/go/unicorn/riscv_const.go
index 08458f77a6..d497b50f87 100644
--- a/bindings/go/unicorn/riscv_const.go
+++ b/bindings/go/unicorn/riscv_const.go
@@ -5,15 +5,17 @@ const (
 // RISCV32 CPU
 
 	CPU_RISCV32_ANY = 0
-	CPU_RISCV32_BASE32 = 1
-	CPU_RISCV32_SIFIVE_E31 = 2
-	CPU_RISCV32_SIFIVE_U34 = 3
-	CPU_RISCV32_ENDING = 4
+	CPU_RISCV32_BASE = 1
+	CPU_RISCV32_IBEX = 2
+	CPU_RISCV32_SIFIVE_E31 = 3
+	CPU_RISCV32_SIFIVE_E34 = 4
+	CPU_RISCV32_SIFIVE_U34 = 5
+	CPU_RISCV32_ENDING = 6
 
 // RISCV64 CPU
 
 	CPU_RISCV64_ANY = 0
-	CPU_RISCV64_BASE64 = 1
+	CPU_RISCV64_BASE = 1
 	CPU_RISCV64_SIFIVE_E51 = 2
 	CPU_RISCV64_SIFIVE_U54 = 3
 	CPU_RISCV64_ENDING = 4
diff --git a/bindings/java/src/main/java/unicorn/MipsConst.java b/bindings/java/src/main/java/unicorn/MipsConst.java
index bf6d8cf2fe..567f55661d 100644
--- a/bindings/java/src/main/java/unicorn/MipsConst.java
+++ b/bindings/java/src/main/java/unicorn/MipsConst.java
@@ -38,8 +38,10 @@ public interface MipsConst {
     public static final int UC_CPU_MIPS64_I6500 = 9;
     public static final int UC_CPU_MIPS64_LOONGSON_2E = 10;
     public static final int UC_CPU_MIPS64_LOONGSON_2F = 11;
-    public static final int UC_CPU_MIPS64_MIPS64DSPR2 = 12;
-    public static final int UC_CPU_MIPS64_ENDING = 13;
+    public static final int UC_CPU_MIPS64_LOONGSON_3A1000 = 12;
+    public static final int UC_CPU_MIPS64_LOONGSON_3A4000 = 13;
+    public static final int UC_CPU_MIPS64_MIPS64DSPR2 = 14;
+    public static final int UC_CPU_MIPS64_ENDING = 15;
 
     // MIPS registers
 
diff --git a/bindings/java/src/main/java/unicorn/RiscvConst.java b/bindings/java/src/main/java/unicorn/RiscvConst.java
index 5814180974..3a298c5c7a 100644
--- a/bindings/java/src/main/java/unicorn/RiscvConst.java
+++ b/bindings/java/src/main/java/unicorn/RiscvConst.java
@@ -7,15 +7,17 @@ public interface RiscvConst {
     // RISCV32 CPU
 
     public static final int UC_CPU_RISCV32_ANY = 0;
-    public static final int UC_CPU_RISCV32_BASE32 = 1;
-    public static final int UC_CPU_RISCV32_SIFIVE_E31 = 2;
-    public static final int UC_CPU_RISCV32_SIFIVE_U34 = 3;
-    public static final int UC_CPU_RISCV32_ENDING = 4;
+    public static final int UC_CPU_RISCV32_BASE = 1;
+    public static final int UC_CPU_RISCV32_IBEX = 2;
+    public static final int UC_CPU_RISCV32_SIFIVE_E31 = 3;
+    public static final int UC_CPU_RISCV32_SIFIVE_E34 = 4;
+    public static final int UC_CPU_RISCV32_SIFIVE_U34 = 5;
+    public static final int UC_CPU_RISCV32_ENDING = 6;
 
     // RISCV64 CPU
 
     public static final int UC_CPU_RISCV64_ANY = 0;
-    public static final int UC_CPU_RISCV64_BASE64 = 1;
+    public static final int UC_CPU_RISCV64_BASE = 1;
     public static final int UC_CPU_RISCV64_SIFIVE_E51 = 2;
     public static final int UC_CPU_RISCV64_SIFIVE_U54 = 3;
     public static final int UC_CPU_RISCV64_ENDING = 4;
diff --git a/bindings/pascal/unicorn/MipsConst.pas b/bindings/pascal/unicorn/MipsConst.pas
index 3cbce39ac0..748b1d8568 100644
--- a/bindings/pascal/unicorn/MipsConst.pas
+++ b/bindings/pascal/unicorn/MipsConst.pas
@@ -39,8 +39,10 @@ interface
   UC_CPU_MIPS64_I6500 = 9;
   UC_CPU_MIPS64_LOONGSON_2E = 10;
   UC_CPU_MIPS64_LOONGSON_2F = 11;
-  UC_CPU_MIPS64_MIPS64DSPR2 = 12;
-  UC_CPU_MIPS64_ENDING = 13;
+  UC_CPU_MIPS64_LOONGSON_3A1000 = 12;
+  UC_CPU_MIPS64_LOONGSON_3A4000 = 13;
+  UC_CPU_MIPS64_MIPS64DSPR2 = 14;
+  UC_CPU_MIPS64_ENDING = 15;
 
 // MIPS registers
 
@@ -242,4 +244,4 @@ interface
   UC_MIPS_REG_LO3 = 48;
 
 implementation
-end.
\ No newline at end of file
+end.
diff --git a/bindings/pascal/unicorn/RiscvConst.pas b/bindings/pascal/unicorn/RiscvConst.pas
index 075e271c65..a4629832c8 100644
--- a/bindings/pascal/unicorn/RiscvConst.pas
+++ b/bindings/pascal/unicorn/RiscvConst.pas
@@ -8,15 +8,17 @@ interface
 // RISCV32 CPU
 
   UC_CPU_RISCV32_ANY = 0;
-  UC_CPU_RISCV32_BASE32 = 1;
-  UC_CPU_RISCV32_SIFIVE_E31 = 2;
-  UC_CPU_RISCV32_SIFIVE_U34 = 3;
-  UC_CPU_RISCV32_ENDING = 4;
+  UC_CPU_RISCV32_BASE = 1;
+  UC_CPU_RISCV32_IBEX = 2;
+  UC_CPU_RISCV32_SIFIVE_E31 = 3;
+  UC_CPU_RISCV32_SIFIVE_E34 = 4;
+  UC_CPU_RISCV32_SIFIVE_U34 = 5;
+  UC_CPU_RISCV32_ENDING = 6;
 
 // RISCV64 CPU
 
   UC_CPU_RISCV64_ANY = 0;
-  UC_CPU_RISCV64_BASE64 = 1;
+  UC_CPU_RISCV64_BASE = 1;
   UC_CPU_RISCV64_SIFIVE_E51 = 2;
   UC_CPU_RISCV64_SIFIVE_U54 = 3;
   UC_CPU_RISCV64_ENDING = 4;
@@ -291,4 +293,4 @@ interface
   UC_RISCV_REG_FT11 = 189;
 
 implementation
-end.
\ No newline at end of file
+end.
diff --git a/bindings/python/unicorn/mips_const.py b/bindings/python/unicorn/mips_const.py
index c60b2d0f77..63bf9f3f39 100644
--- a/bindings/python/unicorn/mips_const.py
+++ b/bindings/python/unicorn/mips_const.py
@@ -34,8 +34,10 @@
 UC_CPU_MIPS64_I6500 = 9
 UC_CPU_MIPS64_LOONGSON_2E = 10
 UC_CPU_MIPS64_LOONGSON_2F = 11
-UC_CPU_MIPS64_MIPS64DSPR2 = 12
-UC_CPU_MIPS64_ENDING = 13
+UC_CPU_MIPS64_LOONGSON_3A1000 = 12
+UC_CPU_MIPS64_LOONGSON_3A4000 = 13
+UC_CPU_MIPS64_MIPS64DSPR2 = 14
+UC_CPU_MIPS64_ENDING = 15
 
 # MIPS registers
 
diff --git a/bindings/python/unicorn/riscv_const.py b/bindings/python/unicorn/riscv_const.py
index 3e63376fd5..d1f2ccd2f9 100644
--- a/bindings/python/unicorn/riscv_const.py
+++ b/bindings/python/unicorn/riscv_const.py
@@ -3,15 +3,17 @@
 # RISCV32 CPU
 
 UC_CPU_RISCV32_ANY = 0
-UC_CPU_RISCV32_BASE32 = 1
-UC_CPU_RISCV32_SIFIVE_E31 = 2
-UC_CPU_RISCV32_SIFIVE_U34 = 3
-UC_CPU_RISCV32_ENDING = 4
+UC_CPU_RISCV32_BASE = 1
+UC_CPU_RISCV32_IBEX = 2
+UC_CPU_RISCV32_SIFIVE_E31 = 3
+UC_CPU_RISCV32_SIFIVE_E34 = 4
+UC_CPU_RISCV32_SIFIVE_U34 = 5
+UC_CPU_RISCV32_ENDING = 6
 
 # RISCV64 CPU
 
 UC_CPU_RISCV64_ANY = 0
-UC_CPU_RISCV64_BASE64 = 1
+UC_CPU_RISCV64_BASE = 1
 UC_CPU_RISCV64_SIFIVE_E51 = 2
 UC_CPU_RISCV64_SIFIVE_U54 = 3
 UC_CPU_RISCV64_ENDING = 4
diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb
index 374912a870..ed9520b592 100644
--- a/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb
+++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb
@@ -36,8 +36,10 @@ module UnicornEngine
 	UC_CPU_MIPS64_I6500 = 9
 	UC_CPU_MIPS64_LOONGSON_2E = 10
 	UC_CPU_MIPS64_LOONGSON_2F = 11
-	UC_CPU_MIPS64_MIPS64DSPR2 = 12
-	UC_CPU_MIPS64_ENDING = 13
+	UC_CPU_MIPS64_LOONGSON_3A1000 = 12
+	UC_CPU_MIPS64_LOONGSON_3A4000 = 13
+	UC_CPU_MIPS64_MIPS64DSPR2 = 14
+	UC_CPU_MIPS64_ENDING = 15
 
 # MIPS registers
 
diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb
index 33203d0a4d..99eba71355 100644
--- a/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb
+++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb
@@ -5,15 +5,17 @@ module UnicornEngine
 # RISCV32 CPU
 
 	UC_CPU_RISCV32_ANY = 0
-	UC_CPU_RISCV32_BASE32 = 1
-	UC_CPU_RISCV32_SIFIVE_E31 = 2
-	UC_CPU_RISCV32_SIFIVE_U34 = 3
-	UC_CPU_RISCV32_ENDING = 4
+	UC_CPU_RISCV32_BASE = 1
+	UC_CPU_RISCV32_IBEX = 2
+	UC_CPU_RISCV32_SIFIVE_E31 = 3
+	UC_CPU_RISCV32_SIFIVE_E34 = 4
+	UC_CPU_RISCV32_SIFIVE_U34 = 5
+	UC_CPU_RISCV32_ENDING = 6
 
 # RISCV64 CPU
 
 	UC_CPU_RISCV64_ANY = 0
-	UC_CPU_RISCV64_BASE64 = 1
+	UC_CPU_RISCV64_BASE = 1
 	UC_CPU_RISCV64_SIFIVE_E51 = 2
 	UC_CPU_RISCV64_SIFIVE_U54 = 3
 	UC_CPU_RISCV64_ENDING = 4
diff --git a/bindings/rust/src/riscv.rs b/bindings/rust/src/riscv.rs
index 53c5990bc3..0862e91e7b 100644
--- a/bindings/rust/src/riscv.rs
+++ b/bindings/rust/src/riscv.rs
@@ -349,8 +349,10 @@ impl From<RegisterRISCV> for i32 {
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub enum Riscv32CpuModel {
     UC_CPU_RISCV32_ANY = 0,
-    UC_CPU_RISCV32_BASE32,
+    UC_CPU_RISCV32_BASE,
+    UC_CPU_RISCV32_IBEX,
     UC_CPU_RISCV32_SIFIVE_E31,
+    UC_CPU_RISCV32_SIFIVE_E34,
     UC_CPU_RISCV32_SIFIVE_U34,
 }
 
@@ -370,7 +372,7 @@ impl From<&Riscv32CpuModel> for i32 {
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub enum Riscv64CpuModel {
     UC_CPU_RISCV64_ANY = 0,
-    UC_CPU_RISCV64_BASE64,
+    UC_CPU_RISCV64_BASE,
     UC_CPU_RISCV64_SIFIVE_E51,
     UC_CPU_RISCV64_SIFIVE_U54,
 }
diff --git a/bindings/zig/unicorn/mips_const.zig b/bindings/zig/unicorn/mips_const.zig
index 0987cb2f7b..de66623340 100644
--- a/bindings/zig/unicorn/mips_const.zig
+++ b/bindings/zig/unicorn/mips_const.zig
@@ -36,8 +36,10 @@ pub const mipsConst = enum(c_int) {
 	CPU_MIPS64_I6500 = 9,
 	CPU_MIPS64_LOONGSON_2E = 10,
 	CPU_MIPS64_LOONGSON_2F = 11,
-	CPU_MIPS64_MIPS64DSPR2 = 12,
-	CPU_MIPS64_ENDING = 13,
+	CPU_MIPS64_LOONGSON_3A1000 = 12,
+	CPU_MIPS64_LOONGSON_3A4000 = 13,
+	CPU_MIPS64_MIPS64DSPR2 = 14,
+	CPU_MIPS64_ENDING = 15,
 
 // MIPS registers
 
diff --git a/bindings/zig/unicorn/riscv_const.zig b/bindings/zig/unicorn/riscv_const.zig
index 00a34001f7..340d1988c6 100644
--- a/bindings/zig/unicorn/riscv_const.zig
+++ b/bindings/zig/unicorn/riscv_const.zig
@@ -5,10 +5,12 @@ pub const riscvConst = enum(c_int) {
 // RISCV32 CPU
 
 	CPU_RISCV32_ANY = 0,
-	CPU_RISCV32_BASE32 = 1,
-	CPU_RISCV32_SIFIVE_E31 = 2,
-	CPU_RISCV32_SIFIVE_U34 = 3,
-	CPU_RISCV32_ENDING = 4,
+	CPU_RISCV32_BASE = 1,
+	CPU_RISCV32_IBEX = 2,
+	CPU_RISCV32_SIFIVE_E31 = 3,
+	CPU_RISCV32_SIFIVE_E34 = 4,
+	CPU_RISCV32_SIFIVE_U34 = 5,
+	CPU_RISCV32_ENDING = 6,
 
 // RISCV64 CPU
 
diff --git a/include/unicorn/mips.h b/include/unicorn/mips.h
index 7a4c9c1cb8..4eeb9241d7 100644
--- a/include/unicorn/mips.h
+++ b/include/unicorn/mips.h
@@ -55,6 +55,8 @@ typedef enum uc_cpu_mips64 {
     UC_CPU_MIPS64_I6500,
     UC_CPU_MIPS64_LOONGSON_2E,
     UC_CPU_MIPS64_LOONGSON_2F,
+    UC_CPU_MIPS64_LOONGSON_3A1000,
+    UC_CPU_MIPS64_LOONGSON_3A4000,
     UC_CPU_MIPS64_MIPS64DSPR2,
 
     UC_CPU_MIPS64_ENDING
diff --git a/include/unicorn/riscv.h b/include/unicorn/riscv.h
index cf1595ae4f..e72a1aa39c 100644
--- a/include/unicorn/riscv.h
+++ b/include/unicorn/riscv.h
@@ -18,8 +18,10 @@ extern "C" {
 //> RISCV32 CPU
 typedef enum uc_cpu_riscv32 {
     UC_CPU_RISCV32_ANY = 0,
-    UC_CPU_RISCV32_BASE32,
+    UC_CPU_RISCV32_BASE,
+    UC_CPU_RISCV32_IBEX,
     UC_CPU_RISCV32_SIFIVE_E31,
+    UC_CPU_RISCV32_SIFIVE_E34,
     UC_CPU_RISCV32_SIFIVE_U34,
 
     UC_CPU_RISCV32_ENDING
@@ -28,7 +30,7 @@ typedef enum uc_cpu_riscv32 {
 //> RISCV64 CPU
 typedef enum uc_cpu_riscv64 {
     UC_CPU_RISCV64_ANY = 0,
-    UC_CPU_RISCV64_BASE64,
+    UC_CPU_RISCV64_BASE,
     UC_CPU_RISCV64_SIFIVE_E51,
     UC_CPU_RISCV64_SIFIVE_U54,
 
diff --git a/include/unicorn/unicorn.h b/include/unicorn/unicorn.h
index 5521262e8e..faac1378c5 100644
--- a/include/unicorn/unicorn.h
+++ b/include/unicorn/unicorn.h
@@ -69,6 +69,18 @@ typedef size_t uc_hook;
 #define UNICORN_DEPRECATED
 #endif
 
+#ifdef _MSC_VER
+#define UNICORN_UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNICORN_UNUSED __attribute__((unused))
+#endif
+
+#ifdef _MSC_VER
+#define UNICORN_NONNULL
+#else
+#define UNICORN_NONNULL __attribute__((nonnull))
+#endif
+
 // Unicorn API version
 #define UC_API_MAJOR 2
 #define UC_API_MINOR 1
diff --git a/qemu/aarch64.h b/qemu/aarch64.h
index 6f1315028d..55c3afb894 100644
--- a/qemu/aarch64.h
+++ b/qemu/aarch64.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_aarch64
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_aarch64
 #define tcg_gen_st_i64 tcg_gen_st_i64_aarch64
+#define tcg_gen_add_i64 tcg_gen_add_i64_aarch64
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_aarch64
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_aarch64
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_aarch64
 #define cpu_icount_to_ns cpu_icount_to_ns_aarch64
 #define cpu_is_stopped cpu_is_stopped_aarch64
 #define cpu_get_ticks cpu_get_ticks_aarch64
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_aarch64
 #define floatx80_mul floatx80_mul_aarch64
 #define floatx80_div floatx80_div_aarch64
+#define floatx80_modrem floatx80_modrem_aarch64
+#define floatx80_mod floatx80_mod_aarch64
 #define floatx80_rem floatx80_rem_aarch64
 #define floatx80_sqrt floatx80_sqrt_aarch64
 #define floatx80_eq floatx80_eq_aarch64
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_aarch64
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_aarch64
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_aarch64
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_aarch64
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_aarch64
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_aarch64
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_aarch64
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_aarch64
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_aarch64
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_aarch64
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_aarch64
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_aarch64
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_aarch64
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_aarch64
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_aarch64
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_aarch64
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_aarch64
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_aarch64
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_aarch64
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_aarch64
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_aarch64
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_aarch64
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_aarch64
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_aarch64
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_aarch64
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_aarch64
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_aarch64
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_aarch64
 #define tcg_gen_shri_vec tcg_gen_shri_vec_aarch64
 #define tcg_gen_sari_vec tcg_gen_sari_vec_aarch64
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_aarch64
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_aarch64
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_aarch64
 #define tcg_gen_add_vec tcg_gen_add_vec_aarch64
 #define tcg_gen_sub_vec tcg_gen_sub_vec_aarch64
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_aarch64
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_aarch64
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_aarch64
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_aarch64
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_aarch64
 #define tcg_gen_shls_vec tcg_gen_shls_vec_aarch64
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_aarch64
 #define tcg_gen_sars_vec tcg_gen_sars_vec_aarch64
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_aarch64
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_aarch64
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_aarch64
 #define tb_htable_lookup tb_htable_lookup_aarch64
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_aarch64
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_aarch64
 #define tlb_init tlb_init_aarch64
+#define tlb_destroy tlb_destroy_aarch64
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_aarch64
 #define tlb_flush tlb_flush_aarch64
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_aarch64
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_aarch64
 #define get_page_addr_code_hostp get_page_addr_code_hostp_aarch64
 #define get_page_addr_code get_page_addr_code_aarch64
+#define probe_access_flags probe_access_flags_aarch64
 #define probe_access probe_access_aarch64
 #define tlb_vaddr_to_host tlb_vaddr_to_host_aarch64
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_aarch64
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_aarch64
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_aarch64
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_aarch64
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_aarch64
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_aarch64
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_aarch64
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_aarch64
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_aarch64
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_aarch64
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_aarch64
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_aarch64
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_aarch64
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_aarch64
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_aarch64
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_aarch64
 #define cpu_ldub_data_ra cpu_ldub_data_ra_aarch64
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_aarch64
-#define cpu_lduw_data_ra cpu_lduw_data_ra_aarch64
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_aarch64
-#define cpu_ldl_data_ra cpu_ldl_data_ra_aarch64
-#define cpu_ldq_data_ra cpu_ldq_data_ra_aarch64
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_aarch64
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_aarch64
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_aarch64
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_aarch64
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_aarch64
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_aarch64
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_aarch64
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_aarch64
 #define cpu_ldub_data cpu_ldub_data_aarch64
 #define cpu_ldsb_data cpu_ldsb_data_aarch64
-#define cpu_lduw_data cpu_lduw_data_aarch64
-#define cpu_ldsw_data cpu_ldsw_data_aarch64
-#define cpu_ldl_data cpu_ldl_data_aarch64
-#define cpu_ldq_data cpu_ldq_data_aarch64
+#define cpu_lduw_be_data cpu_lduw_be_data_aarch64
+#define cpu_lduw_le_data cpu_lduw_le_data_aarch64
+#define cpu_ldsw_be_data cpu_ldsw_be_data_aarch64
+#define cpu_ldsw_le_data cpu_ldsw_le_data_aarch64
+#define cpu_ldl_be_data cpu_ldl_be_data_aarch64
+#define cpu_ldl_le_data cpu_ldl_le_data_aarch64
+#define cpu_ldq_le_data cpu_ldq_le_data_aarch64
+#define cpu_ldq_be_data cpu_ldq_be_data_aarch64
 #define helper_ret_stb_mmu helper_ret_stb_mmu_aarch64
 #define helper_le_stw_mmu helper_le_stw_mmu_aarch64
 #define helper_be_stw_mmu helper_be_stw_mmu_aarch64
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_aarch64
 #define helper_be_stq_mmu helper_be_stq_mmu_aarch64
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_aarch64
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_aarch64
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_aarch64
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_aarch64
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_aarch64
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_aarch64
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_aarch64
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_aarch64
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_aarch64
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_aarch64
 #define cpu_stb_data_ra cpu_stb_data_ra_aarch64
-#define cpu_stw_data_ra cpu_stw_data_ra_aarch64
-#define cpu_stl_data_ra cpu_stl_data_ra_aarch64
-#define cpu_stq_data_ra cpu_stq_data_ra_aarch64
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_aarch64
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_aarch64
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_aarch64
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_aarch64
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_aarch64
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_aarch64
 #define cpu_stb_data cpu_stb_data_aarch64
-#define cpu_stw_data cpu_stw_data_aarch64
-#define cpu_stl_data cpu_stl_data_aarch64
-#define cpu_stq_data cpu_stq_data_aarch64
+#define cpu_stw_be_data cpu_stw_be_data_aarch64
+#define cpu_stw_le_data cpu_stw_le_data_aarch64
+#define cpu_stl_be_data cpu_stl_be_data_aarch64
+#define cpu_stl_le_data cpu_stl_le_data_aarch64
+#define cpu_stq_be_data cpu_stq_be_data_aarch64
+#define cpu_stq_le_data cpu_stq_le_data_aarch64
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_aarch64
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_aarch64
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_aarch64
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_aarch64
 #define cpu_ldl_code cpu_ldl_code_aarch64
 #define cpu_ldq_code cpu_ldq_code_aarch64
+#define cpu_interrupt_handler cpu_interrupt_handler_aarch64
 #define helper_div_i32 helper_div_i32_aarch64
 #define helper_rem_i32 helper_rem_i32_aarch64
 #define helper_divu_i32 helper_divu_i32_aarch64
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_aarch64
 #define helper_gvec_sar32i helper_gvec_sar32i_aarch64
 #define helper_gvec_sar64i helper_gvec_sar64i_aarch64
+#define helper_gvec_rotl8i helper_gvec_rotl8i_aarch64
+#define helper_gvec_rotl16i helper_gvec_rotl16i_aarch64
+#define helper_gvec_rotl32i helper_gvec_rotl32i_aarch64
+#define helper_gvec_rotl64i helper_gvec_rotl64i_aarch64
 #define helper_gvec_shl8v helper_gvec_shl8v_aarch64
 #define helper_gvec_shl16v helper_gvec_shl16v_aarch64
 #define helper_gvec_shl32v helper_gvec_shl32v_aarch64
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_aarch64
 #define helper_gvec_sar32v helper_gvec_sar32v_aarch64
 #define helper_gvec_sar64v helper_gvec_sar64v_aarch64
+#define helper_gvec_rotl8v helper_gvec_rotl8v_aarch64
+#define helper_gvec_rotl16v helper_gvec_rotl16v_aarch64
+#define helper_gvec_rotl32v helper_gvec_rotl32v_aarch64
+#define helper_gvec_rotl64v helper_gvec_rotl64v_aarch64
+#define helper_gvec_rotr8v helper_gvec_rotr8v_aarch64
+#define helper_gvec_rotr16v helper_gvec_rotr16v_aarch64
+#define helper_gvec_rotr32v helper_gvec_rotr32v_aarch64
+#define helper_gvec_rotr64v helper_gvec_rotr64v_aarch64
 #define helper_gvec_eq8 helper_gvec_eq8_aarch64
 #define helper_gvec_ne8 helper_gvec_ne8_aarch64
 #define helper_gvec_lt8 helper_gvec_lt8_aarch64
@@ -1615,6 +1669,11 @@
 #define arm_v7m_mmu_idx_all arm_v7m_mmu_idx_all_aarch64
 #define arm_v7m_mmu_idx_for_secstate_and_priv arm_v7m_mmu_idx_for_secstate_and_priv_aarch64
 #define arm_v7m_mmu_idx_for_secstate arm_v7m_mmu_idx_for_secstate_aarch64
+#define mte_probe1 mte_probe1_aarch64
+#define mte_check1 mte_check1_aarch64
+#define mte_checkN mte_checkN_aarch64
+#define gen_helper_mte_check1 gen_helper_mte_check1_aarch64
+#define gen_helper_mte_checkN gen_helper_mte_checkN_aarch64
 #define helper_neon_qadd_u8 helper_neon_qadd_u8_aarch64
 #define helper_neon_qadd_u16 helper_neon_qadd_u16_aarch64
 #define helper_neon_qadd_u32 helper_neon_qadd_u32_aarch64
@@ -1854,6 +1913,21 @@
 #define helper_autdb helper_autdb_aarch64
 #define helper_xpaci helper_xpaci_aarch64
 #define helper_xpacd helper_xpacd_aarch64
+#define helper_mte_check1 helper_mte_check1_aarch64
+#define helper_mte_checkN helper_mte_checkN_aarch64
+#define helper_mte_check_zva helper_mte_check_zva_aarch64
+#define helper_irg helper_irg_aarch64
+#define helper_addsubg helper_addsubg_aarch64
+#define helper_ldg helper_ldg_aarch64
+#define helper_stg helper_stg_aarch64
+#define helper_stg_parallel helper_stg_parallel_aarch64
+#define helper_stg_stub helper_stg_stub_aarch64
+#define helper_st2g helper_st2g_aarch64
+#define helper_st2g_parallel helper_st2g_parallel_aarch64
+#define helper_st2g_stub helper_st2g_stub_aarch64
+#define helper_ldgm helper_ldgm_aarch64
+#define helper_stgm helper_stgm_aarch64
+#define helper_stzgm_tags helper_stzgm_tags_aarch64
 #define arm_is_psci_call arm_is_psci_call_aarch64
 #define arm_handle_psci_call arm_handle_psci_call_aarch64
 #define helper_sve_predtest1 helper_sve_predtest1_aarch64
@@ -2746,6 +2820,7 @@
 #define gen_a64_set_pc_im gen_a64_set_pc_im_aarch64
 #define unallocated_encoding unallocated_encoding_aarch64
 #define new_tmp_a64 new_tmp_a64_aarch64
+#define new_tmp_a64_local new_tmp_a64_local_aarch64
 #define new_tmp_a64_zero new_tmp_a64_zero_aarch64
 #define cpu_reg cpu_reg_aarch64
 #define cpu_reg_sp cpu_reg_sp_aarch64
diff --git a/qemu/accel/tcg/cputlb.c b/qemu/accel/tcg/cputlb.c
index f7ffee48f1..3efbf5562c 100644
--- a/qemu/accel/tcg/cputlb.c
+++ b/qemu/accel/tcg/cputlb.c
@@ -261,6 +261,21 @@ void tlb_init(CPUState *cpu)
     }
 }
 
+void tlb_destroy(CPUState *cpu)
+{
+    CPUArchState *env = cpu->env_ptr;
+    int i;
+
+    // qemu_spin_destroy(&env_tlb(env)->c.lock);
+    for (i = 0; i < NB_MMU_MODES; i++) {
+        CPUTLBDesc *desc = &env_tlb(env)->d[i];
+        CPUTLBDescFast *fast = &env_tlb(env)->f[i];
+
+        g_free(fast->table);
+        g_free(desc->iotlb);
+    }
+}
+
 /* flush_all_helper: run fn across all cpus
  *
  * If the wait flag is set then the src cpu's helper will be queued as
@@ -450,9 +465,7 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
                                              run_on_cpu_data data)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = cpu->uc;
     target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
     target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
     uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
@@ -486,9 +499,7 @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
 
 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = cpu->uc;
 
     /* This should already be page aligned */
     addr &= TARGET_PAGE_MASK;
@@ -523,9 +534,7 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
                                        uint16_t idxmap)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = src_cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = src_cpu->uc;
 
     /* This should already be page aligned */
     addr &= TARGET_PAGE_MASK;
@@ -567,9 +576,7 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
                                               target_ulong addr,
                                               uint16_t idxmap)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = src_cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = src_cpu->uc;
 
     /* This should already be page aligned */
     addr &= TARGET_PAGE_MASK;
@@ -755,9 +762,7 @@ static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
    so that it is no longer dirty */
 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = cpu->uc;
     CPUArchState *env = cpu->env_ptr;
     int mmu_idx;
 
@@ -811,9 +816,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
                              hwaddr paddr, MemTxAttrs attrs, int prot,
                              int mmu_idx, target_ulong size)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = cpu->uc;
     CPUArchState *env = cpu->env_ptr;
     CPUTLB *tlb = env_tlb(env);
     CPUTLBDesc *desc = &tlb->d[mmu_idx];
@@ -1190,9 +1193,7 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr,
                            CPUTLBEntry *tlbe)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = cpu->uc;
     ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
     MemoryRegion *mr = cpu->uc->memory_mapping(cpu->uc, tlbe->paddr | (mem_vaddr & ~TARGET_PAGE_MASK));
 
@@ -1215,6 +1216,86 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
     }
 }
 
+static int probe_access_internal(CPUArchState *env, target_ulong addr,
+                                 int fault_size, MMUAccessType access_type,
+                                 int mmu_idx, bool nonfault,
+                                 void **phost, uintptr_t retaddr)
+{
+    struct uc_struct *uc = env->uc;
+    uintptr_t index = tlb_index(env, mmu_idx, addr);
+    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+    target_ulong tlb_addr, page_addr;
+    size_t elt_ofs;
+    int flags;
+
+    switch (access_type) {
+    case MMU_DATA_LOAD:
+        elt_ofs = offsetof(CPUTLBEntry, addr_read);
+        break;
+    case MMU_DATA_STORE:
+        elt_ofs = offsetof(CPUTLBEntry, addr_write);
+        break;
+    case MMU_INST_FETCH:
+        elt_ofs = offsetof(CPUTLBEntry, addr_code);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    tlb_addr = tlb_read_ofs(entry, elt_ofs);
+
+    page_addr = addr & TARGET_PAGE_MASK;
+    if (!tlb_hit_page(uc, tlb_addr, page_addr)) {
+        if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
+            CPUState *cs = env_cpu(env);
+            CPUClass *cc = CPU_GET_CLASS(cs);
+
+            if (!cc->tlb_fill(cs, addr, fault_size, access_type,
+                              mmu_idx, nonfault, retaddr)) {
+                /* Non-faulting page table read failed.  */
+                *phost = NULL;
+                return TLB_INVALID_MASK;
+            }
+
+            /* TLB resize via tlb_fill may have moved the entry.  */
+            entry = tlb_entry(env, mmu_idx, addr);
+        }
+        tlb_addr = tlb_read_ofs(entry, elt_ofs);
+    }
+    flags = tlb_addr & TLB_FLAGS_MASK;
+
+    /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
+    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
+        *phost = NULL;
+        return TLB_MMIO;
+    }
+
+    /* Everything else is RAM. */
+    *phost = (void *)((uintptr_t)addr + entry->addend);
+    return flags;
+}
+
+int probe_access_flags(CPUArchState *env, target_ulong addr,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool nonfault, void **phost, uintptr_t retaddr)
+{
+    int flags;
+    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+
+    flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
+                                  nonfault, phost, retaddr);
+
+    /* Handle clean RAM pages.  */
+    if (unlikely(flags & TLB_NOTDIRTY)) {
+        uintptr_t index = tlb_index(env, mmu_idx, addr);
+        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+
+        notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr, entry);
+        flags &= ~TLB_NOTDIRTY;
+    }
+
+    return flags;
+}
+
 /*
  * Probe for whether the specified guest access is permitted. If it is not
  * permitted then an exception will be taken in the same way as if this
@@ -1225,9 +1306,7 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
 void *probe_access(CPUArchState *env, target_ulong addr, int size,
                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = env->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = env->uc;
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
     target_ulong tlb_addr;
@@ -1352,9 +1431,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
                                TCGMemOpIdx oi, uintptr_t retaddr)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = env->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = env->uc;
     size_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
@@ -1951,36 +2028,54 @@ int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
                                    full_ldub_mmu);
 }
 
-uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                            int mmu_idx, uintptr_t ra)
+uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                               int mmu_idx, uintptr_t ra)
 {
-    return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW,
-                           MO_TE == MO_LE
-                           ? full_le_lduw_mmu : full_be_lduw_mmu);
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
 }
 
-int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                       int mmu_idx, uintptr_t ra)
+int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                          int mmu_idx, uintptr_t ra)
+{
+    return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
+                                    full_be_lduw_mmu);
+}
+
+uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
+{
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
+}
+
+uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
 {
-    return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW,
-                                    MO_TE == MO_LE
-                                    ? full_le_lduw_mmu : full_be_lduw_mmu);
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
 }
 
-uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                           int mmu_idx, uintptr_t ra)
+uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                               int mmu_idx, uintptr_t ra)
 {
-    return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL,
-                           MO_TE == MO_LE
-                           ? full_le_ldul_mmu : full_be_ldul_mmu);
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
 }
 
-uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                           int mmu_idx, uintptr_t ra)
+int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                          int mmu_idx, uintptr_t ra)
 {
-    return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ,
-                           MO_TE == MO_LE
-                           ? helper_le_ldq_mmu : helper_be_ldq_mmu);
+    return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
+                                    full_le_lduw_mmu);
+}
+
+uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
+{
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
+}
+
+uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
+{
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
 }
 
 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
@@ -1994,25 +2089,50 @@ int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
     return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
 }
 
-uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr,
-                          uintptr_t retaddr)
+uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
+                             uintptr_t retaddr)
+{
+    return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
+{
+    return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
+                            uintptr_t retaddr)
+{
+    return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
+                            uintptr_t retaddr)
+{
+    return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
+                             uintptr_t retaddr)
 {
-    return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
 }
 
-int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
+int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
 {
-    return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
 }
 
-uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
+uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
+                            uintptr_t retaddr)
 {
-    return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
 }
 
-uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
+uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
+                            uintptr_t retaddr)
 {
-    return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
 }
 
 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
@@ -2025,24 +2145,44 @@ int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
     return cpu_ldsb_data_ra(env, ptr, 0);
 }
 
-uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr)
+uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_lduw_be_data_ra(env, ptr, 0);
+}
+
+int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldsw_be_data_ra(env, ptr, 0);
+}
+
+uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldl_be_data_ra(env, ptr, 0);
+}
+
+uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
 {
-    return cpu_lduw_data_ra(env, ptr, 0);
+    return cpu_ldq_be_data_ra(env, ptr, 0);
 }
 
-int cpu_ldsw_data(CPUArchState *env, target_ulong ptr)
+uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
 {
-    return cpu_ldsw_data_ra(env, ptr, 0);
+    return cpu_lduw_le_data_ra(env, ptr, 0);
 }
 
-uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr)
+int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
 {
-    return cpu_ldl_data_ra(env, ptr, 0);
+    return cpu_ldsw_le_data_ra(env, ptr, 0);
 }
 
-uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr)
+uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
 {
-    return cpu_ldq_data_ra(env, ptr, 0);
+    return cpu_ldl_le_data_ra(env, ptr, 0);
+}
+
+uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldq_le_data_ra(env, ptr, 0);
 }
 
 /*
@@ -2428,22 +2568,40 @@ void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
 }
 
-void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
-                       int mmu_idx, uintptr_t retaddr)
+void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW);
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
 }
 
-void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
-                       int mmu_idx, uintptr_t retaddr)
+void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL);
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
 }
 
-void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
-                       int mmu_idx, uintptr_t retaddr)
+void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
+                          int mmu_idx, uintptr_t retaddr)
+{
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
+}
+
+void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ);
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
+}
+
+void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr)
+{
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
+}
+
+void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
+                          int mmu_idx, uintptr_t retaddr)
+{
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
 }
 
 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
@@ -2452,22 +2610,40 @@ void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
     cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
 }
 
-void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr,
-                     uint32_t val, uintptr_t retaddr)
+void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint32_t val, uintptr_t retaddr)
 {
-    cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+    cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
 }
 
-void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr,
-                     uint32_t val, uintptr_t retaddr)
+void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint32_t val, uintptr_t retaddr)
 {
-    cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+    cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
 }
 
-void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr,
-                     uint64_t val, uintptr_t retaddr)
+void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint64_t val, uintptr_t retaddr)
 {
-    cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+    cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+}
+
+void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint32_t val, uintptr_t retaddr)
+{
+    cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+}
+
+void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint32_t val, uintptr_t retaddr)
+{
+    cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+}
+
+void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint64_t val, uintptr_t retaddr)
+{
+    cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
 }
 
 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
@@ -2475,19 +2651,34 @@ void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
     cpu_stb_data_ra(env, ptr, val, 0);
 }
 
-void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val)
+void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
+{
+    cpu_stw_be_data_ra(env, ptr, val, 0);
+}
+
+void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
+{
+    cpu_stl_be_data_ra(env, ptr, val, 0);
+}
+
+void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
+{
+    cpu_stq_be_data_ra(env, ptr, val, 0);
+}
+
+void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
 {
-    cpu_stw_data_ra(env, ptr, val, 0);
+    cpu_stw_le_data_ra(env, ptr, val, 0);
 }
 
-void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val)
+void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
 {
-    cpu_stl_data_ra(env, ptr, val, 0);
+    cpu_stl_le_data_ra(env, ptr, val, 0);
 }
 
-void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val)
+void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
 {
-    cpu_stq_data_ra(env, ptr, val, 0);
+    cpu_stq_le_data_ra(env, ptr, val, 0);
 }
 
 /* First set of helpers allows passing in of OI and RETADDR.  This makes
diff --git a/qemu/accel/tcg/tcg-runtime-gvec.c b/qemu/accel/tcg/tcg-runtime-gvec.c
index ea997c257f..41ab422366 100644
--- a/qemu/accel/tcg/tcg-runtime-gvec.c
+++ b/qemu/accel/tcg/tcg-runtime-gvec.c
@@ -724,6 +724,54 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
     clear_high(d, oprsz, desc);
 }
 
+void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    int shift = simd_data(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)((char *)d + i) = rol8(*(uint8_t *)((char *)a + i), shift);
+    }
+    clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    int shift = simd_data(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)((char *)d + i) = rol16(*(uint16_t *)((char *)a + i), shift);
+    }
+    clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    int shift = simd_data(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)((char *)d + i) = rol32(*(uint32_t *)((char *)a + i), shift);
+    }
+    clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    int shift = simd_data(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)((char *)d + i) = rol64(*(uint64_t *)((char *)a + i), shift);
+    }
+    clear_high(d, oprsz, desc);
+}
+
 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
@@ -868,6 +916,102 @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
     clear_high(d, oprsz, desc);
 }
 
+void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        uint8_t sh = *(uint8_t *)((char *)b + i) & 7;
+        *(uint8_t *)((char *)d + i) = rol8(*(uint8_t *)((char *)a + i), sh);
+    }
+    clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        uint8_t sh = *(uint16_t *)((char *)b + i) & 15;
+        *(uint16_t *)((char *)d + i) = rol16(*(uint16_t *)((char *)a + i), sh);
+    }
+    clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        uint8_t sh = *(uint32_t *)((char *)b + i) & 31;
+        *(uint32_t *)((char *)d + i) = rol32(*(uint32_t *)((char *)a + i), sh);
+    }
+    clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        uint8_t sh = *(uint64_t *)((char *)b + i) & 63;
+        *(uint64_t *)((char *)d + i) = rol64(*(uint64_t *)((char *)a + i), sh);
+    }
+    clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        uint8_t sh = *(uint8_t *)((char *)b + i) & 7;
+        *(uint8_t *)((char *)d + i) = ror8(*(uint8_t *)((char *)a + i), sh);
+    }
+    clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        uint8_t sh = *(uint16_t *)((char *)b + i) & 15;
+        *(uint16_t *)((char *)d + i) = ror16(*(uint16_t *)((char *)a + i), sh);
+    }
+    clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        uint8_t sh = *(uint32_t *)((char *)b + i) & 31;
+        *(uint32_t *)((char *)d + i) = ror32(*(uint32_t *)((char *)a + i), sh);
+    }
+    clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        uint8_t sh = *(uint64_t *)((char *)b + i) & 63;
+        *(uint64_t *)((char *)d + i) = ror64(*(uint64_t *)((char *)a + i), sh);
+    }
+    clear_high(d, oprsz, desc);
+}
+
 #define DO_CMP1(NAME, TYPE, OP)                                            \
 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
 {                                                                          \
diff --git a/qemu/accel/tcg/tcg-runtime.h b/qemu/accel/tcg/tcg-runtime.h
index ab7369e8e3..b694d30e22 100644
--- a/qemu/accel/tcg/tcg-runtime.h
+++ b/qemu/accel/tcg/tcg-runtime.h
@@ -213,6 +213,11 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_3(gvec_rotl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -228,6 +233,16 @@ DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_rotl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_rotr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c
index 3f6d2630f7..d240f35c87 100644
--- a/qemu/accel/tcg/translate-all.c
+++ b/qemu/accel/tcg/translate-all.c
@@ -1694,9 +1694,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
                               target_ulong pc, target_ulong cs_base,
                               uint32_t flags, int cflags)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = cpu->uc;
     TCGContext *tcg_ctx = cpu->uc->tcg_ctx;
     CPUArchState *env = cpu->env_ptr;
     TranslationBlock *tb, *existing_tb;
@@ -2155,9 +2153,7 @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
 
 void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = cpu->uc;
 
     /* Discard jump cache entries for any tb which might potentially
        overlap the flushed page.  */
diff --git a/qemu/arm.h b/qemu/arm.h
index 27592db350..061cd1d444 100644
--- a/qemu/arm.h
+++ b/qemu/arm.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_arm
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_arm
 #define tcg_gen_st_i64 tcg_gen_st_i64_arm
+#define tcg_gen_add_i64 tcg_gen_add_i64_arm
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_arm
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_arm
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_arm
 #define cpu_icount_to_ns cpu_icount_to_ns_arm
 #define cpu_is_stopped cpu_is_stopped_arm
 #define cpu_get_ticks cpu_get_ticks_arm
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_arm
 #define floatx80_mul floatx80_mul_arm
 #define floatx80_div floatx80_div_arm
+#define floatx80_modrem floatx80_modrem_arm
+#define floatx80_mod floatx80_mod_arm
 #define floatx80_rem floatx80_rem_arm
 #define floatx80_sqrt floatx80_sqrt_arm
 #define floatx80_eq floatx80_eq_arm
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_arm
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_arm
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_arm
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_arm
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_arm
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_arm
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_arm
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_arm
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_arm
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_arm
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_arm
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_arm
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_arm
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_arm
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_arm
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_arm
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_arm
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_arm
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_arm
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_arm
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_arm
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_arm
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_arm
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_arm
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_arm
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_arm
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_arm
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_arm
 #define tcg_gen_shri_vec tcg_gen_shri_vec_arm
 #define tcg_gen_sari_vec tcg_gen_sari_vec_arm
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_arm
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_arm
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_arm
 #define tcg_gen_add_vec tcg_gen_add_vec_arm
 #define tcg_gen_sub_vec tcg_gen_sub_vec_arm
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_arm
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_arm
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_arm
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_arm
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_arm
 #define tcg_gen_shls_vec tcg_gen_shls_vec_arm
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_arm
 #define tcg_gen_sars_vec tcg_gen_sars_vec_arm
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_arm
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_arm
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_arm
 #define tb_htable_lookup tb_htable_lookup_arm
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_arm
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_arm
 #define tlb_init tlb_init_arm
+#define tlb_destroy tlb_destroy_arm
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_arm
 #define tlb_flush tlb_flush_arm
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_arm
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_arm
 #define get_page_addr_code_hostp get_page_addr_code_hostp_arm
 #define get_page_addr_code get_page_addr_code_arm
+#define probe_access_flags probe_access_flags_arm
 #define probe_access probe_access_arm
 #define tlb_vaddr_to_host tlb_vaddr_to_host_arm
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_arm
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_arm
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_arm
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_arm
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_arm
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_arm
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_arm
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_arm
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_arm
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_arm
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_arm
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_arm
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_arm
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_arm
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_arm
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_arm
 #define cpu_ldub_data_ra cpu_ldub_data_ra_arm
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_arm
-#define cpu_lduw_data_ra cpu_lduw_data_ra_arm
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_arm
-#define cpu_ldl_data_ra cpu_ldl_data_ra_arm
-#define cpu_ldq_data_ra cpu_ldq_data_ra_arm
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_arm
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_arm
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_arm
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_arm
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_arm
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_arm
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_arm
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_arm
 #define cpu_ldub_data cpu_ldub_data_arm
 #define cpu_ldsb_data cpu_ldsb_data_arm
-#define cpu_lduw_data cpu_lduw_data_arm
-#define cpu_ldsw_data cpu_ldsw_data_arm
-#define cpu_ldl_data cpu_ldl_data_arm
-#define cpu_ldq_data cpu_ldq_data_arm
+#define cpu_lduw_be_data cpu_lduw_be_data_arm
+#define cpu_lduw_le_data cpu_lduw_le_data_arm
+#define cpu_ldsw_be_data cpu_ldsw_be_data_arm
+#define cpu_ldsw_le_data cpu_ldsw_le_data_arm
+#define cpu_ldl_be_data cpu_ldl_be_data_arm
+#define cpu_ldl_le_data cpu_ldl_le_data_arm
+#define cpu_ldq_le_data cpu_ldq_le_data_arm
+#define cpu_ldq_be_data cpu_ldq_be_data_arm
 #define helper_ret_stb_mmu helper_ret_stb_mmu_arm
 #define helper_le_stw_mmu helper_le_stw_mmu_arm
 #define helper_be_stw_mmu helper_be_stw_mmu_arm
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_arm
 #define helper_be_stq_mmu helper_be_stq_mmu_arm
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_arm
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_arm
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_arm
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_arm
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_arm
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_arm
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_arm
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_arm
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_arm
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_arm
 #define cpu_stb_data_ra cpu_stb_data_ra_arm
-#define cpu_stw_data_ra cpu_stw_data_ra_arm
-#define cpu_stl_data_ra cpu_stl_data_ra_arm
-#define cpu_stq_data_ra cpu_stq_data_ra_arm
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_arm
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_arm
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_arm
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_arm
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_arm
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_arm
 #define cpu_stb_data cpu_stb_data_arm
-#define cpu_stw_data cpu_stw_data_arm
-#define cpu_stl_data cpu_stl_data_arm
-#define cpu_stq_data cpu_stq_data_arm
+#define cpu_stw_be_data cpu_stw_be_data_arm
+#define cpu_stw_le_data cpu_stw_le_data_arm
+#define cpu_stl_be_data cpu_stl_be_data_arm
+#define cpu_stl_le_data cpu_stl_le_data_arm
+#define cpu_stq_be_data cpu_stq_be_data_arm
+#define cpu_stq_le_data cpu_stq_le_data_arm
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_arm
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_arm
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_arm
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_arm
 #define cpu_ldl_code cpu_ldl_code_arm
 #define cpu_ldq_code cpu_ldq_code_arm
+#define cpu_interrupt_handler cpu_interrupt_handler_arm
 #define helper_div_i32 helper_div_i32_arm
 #define helper_rem_i32 helper_rem_i32_arm
 #define helper_divu_i32 helper_divu_i32_arm
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_arm
 #define helper_gvec_sar32i helper_gvec_sar32i_arm
 #define helper_gvec_sar64i helper_gvec_sar64i_arm
+#define helper_gvec_rotl8i helper_gvec_rotl8i_arm
+#define helper_gvec_rotl16i helper_gvec_rotl16i_arm
+#define helper_gvec_rotl32i helper_gvec_rotl32i_arm
+#define helper_gvec_rotl64i helper_gvec_rotl64i_arm
 #define helper_gvec_shl8v helper_gvec_shl8v_arm
 #define helper_gvec_shl16v helper_gvec_shl16v_arm
 #define helper_gvec_shl32v helper_gvec_shl32v_arm
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_arm
 #define helper_gvec_sar32v helper_gvec_sar32v_arm
 #define helper_gvec_sar64v helper_gvec_sar64v_arm
+#define helper_gvec_rotl8v helper_gvec_rotl8v_arm
+#define helper_gvec_rotl16v helper_gvec_rotl16v_arm
+#define helper_gvec_rotl32v helper_gvec_rotl32v_arm
+#define helper_gvec_rotl64v helper_gvec_rotl64v_arm
+#define helper_gvec_rotr8v helper_gvec_rotr8v_arm
+#define helper_gvec_rotr16v helper_gvec_rotr16v_arm
+#define helper_gvec_rotr32v helper_gvec_rotr32v_arm
+#define helper_gvec_rotr64v helper_gvec_rotr64v_arm
 #define helper_gvec_eq8 helper_gvec_eq8_arm
 #define helper_gvec_ne8 helper_gvec_ne8_arm
 #define helper_gvec_lt8 helper_gvec_lt8_arm
@@ -1997,4 +2051,100 @@
 #define sri_op sri_op_arm
 #define usra_op usra_op_arm
 #define ssra_op ssra_op_arm
+#define gen_gvec_ceq0 gen_gvec_ceq0_arm
+#define gen_gvec_cge0 gen_gvec_cge0_arm
+#define gen_gvec_cgt0 gen_gvec_cgt0_arm
+#define gen_gvec_cle0 gen_gvec_cle0_arm
+#define gen_gvec_clt0 gen_gvec_clt0_arm
+#define gen_gvec_cmtst gen_gvec_cmtst_arm
+#define gen_gvec_mla gen_gvec_mla_arm
+#define gen_gvec_mls gen_gvec_mls_arm
+#define gen_gvec_saba gen_gvec_saba_arm
+#define gen_gvec_sabd gen_gvec_sabd_arm
+#define gen_gvec_sli gen_gvec_sli_arm
+#define gen_gvec_sqadd_qc gen_gvec_sqadd_qc_arm
+#define gen_gvec_sqrdmlah_qc gen_gvec_sqrdmlah_qc_arm
+#define gen_gvec_sqrdmlsh_qc gen_gvec_sqrdmlsh_qc_arm
+#define gen_gvec_sqsub_qc gen_gvec_sqsub_qc_arm
+#define gen_gvec_sri gen_gvec_sri_arm
+#define gen_gvec_srshr gen_gvec_srshr_arm
+#define gen_gvec_srsra gen_gvec_srsra_arm
+#define gen_gvec_sshl gen_gvec_sshl_arm
+#define gen_gvec_ssra gen_gvec_ssra_arm
+#define gen_gvec_uaba gen_gvec_uaba_arm
+#define gen_gvec_uabd gen_gvec_uabd_arm
+#define gen_gvec_uqadd_qc gen_gvec_uqadd_qc_arm
+#define gen_gvec_uqsub_qc gen_gvec_uqsub_qc_arm
+#define gen_gvec_urshr gen_gvec_urshr_arm
+#define gen_gvec_ursra gen_gvec_ursra_arm
+#define gen_gvec_ushl gen_gvec_ushl_arm
+#define gen_gvec_usra gen_gvec_usra_arm
+#define helper_crypto_rax1 helper_crypto_rax1_arm
+#define helper_crypto_sha1c helper_crypto_sha1c_arm
+#define helper_crypto_sha1m helper_crypto_sha1m_arm
+#define helper_crypto_sha1p helper_crypto_sha1p_arm
+#define helper_crypto_sha1su0 helper_crypto_sha1su0_arm
+#define helper_crypto_sm3tt1a helper_crypto_sm3tt1a_arm
+#define helper_crypto_sm3tt1b helper_crypto_sm3tt1b_arm
+#define helper_crypto_sm3tt2a helper_crypto_sm3tt2a_arm
+#define helper_crypto_sm3tt2b helper_crypto_sm3tt2b_arm
+#define helper_gvec_ceq0_b helper_gvec_ceq0_b_arm
+#define helper_gvec_ceq0_h helper_gvec_ceq0_h_arm
+#define helper_gvec_cge0_b helper_gvec_cge0_b_arm
+#define helper_gvec_cge0_h helper_gvec_cge0_h_arm
+#define helper_gvec_cgt0_b helper_gvec_cgt0_b_arm
+#define helper_gvec_cgt0_h helper_gvec_cgt0_h_arm
+#define helper_gvec_cle0_b helper_gvec_cle0_b_arm
+#define helper_gvec_cle0_h helper_gvec_cle0_h_arm
+#define helper_gvec_clt0_b helper_gvec_clt0_b_arm
+#define helper_gvec_clt0_h helper_gvec_clt0_h_arm
+#define helper_gvec_fabd_s helper_gvec_fabd_s_arm
+#define helper_gvec_saba_b helper_gvec_saba_b_arm
+#define helper_gvec_saba_d helper_gvec_saba_d_arm
+#define helper_gvec_saba_h helper_gvec_saba_h_arm
+#define helper_gvec_saba_s helper_gvec_saba_s_arm
+#define helper_gvec_sabd_b helper_gvec_sabd_b_arm
+#define helper_gvec_sabd_d helper_gvec_sabd_d_arm
+#define helper_gvec_sabd_h helper_gvec_sabd_h_arm
+#define helper_gvec_sabd_s helper_gvec_sabd_s_arm
+#define helper_gvec_sli_b helper_gvec_sli_b_arm
+#define helper_gvec_sli_d helper_gvec_sli_d_arm
+#define helper_gvec_sli_h helper_gvec_sli_h_arm
+#define helper_gvec_sli_s helper_gvec_sli_s_arm
+#define helper_gvec_sri_b helper_gvec_sri_b_arm
+#define helper_gvec_sri_d helper_gvec_sri_d_arm
+#define helper_gvec_sri_h helper_gvec_sri_h_arm
+#define helper_gvec_sri_s helper_gvec_sri_s_arm
+#define helper_gvec_srshr_b helper_gvec_srshr_b_arm
+#define helper_gvec_srshr_d helper_gvec_srshr_d_arm
+#define helper_gvec_srshr_h helper_gvec_srshr_h_arm
+#define helper_gvec_srshr_s helper_gvec_srshr_s_arm
+#define helper_gvec_srsra_b helper_gvec_srsra_b_arm
+#define helper_gvec_srsra_d helper_gvec_srsra_d_arm
+#define helper_gvec_srsra_h helper_gvec_srsra_h_arm
+#define helper_gvec_srsra_s helper_gvec_srsra_s_arm
+#define helper_gvec_ssra_b helper_gvec_ssra_b_arm
+#define helper_gvec_ssra_d helper_gvec_ssra_d_arm
+#define helper_gvec_ssra_h helper_gvec_ssra_h_arm
+#define helper_gvec_ssra_s helper_gvec_ssra_s_arm
+#define helper_gvec_uaba_b helper_gvec_uaba_b_arm
+#define helper_gvec_uaba_d helper_gvec_uaba_d_arm
+#define helper_gvec_uaba_h helper_gvec_uaba_h_arm
+#define helper_gvec_uaba_s helper_gvec_uaba_s_arm
+#define helper_gvec_uabd_b helper_gvec_uabd_b_arm
+#define helper_gvec_uabd_d helper_gvec_uabd_d_arm
+#define helper_gvec_uabd_h helper_gvec_uabd_h_arm
+#define helper_gvec_uabd_s helper_gvec_uabd_s_arm
+#define helper_gvec_urshr_b helper_gvec_urshr_b_arm
+#define helper_gvec_urshr_d helper_gvec_urshr_d_arm
+#define helper_gvec_urshr_h helper_gvec_urshr_h_arm
+#define helper_gvec_urshr_s helper_gvec_urshr_s_arm
+#define helper_gvec_ursra_b helper_gvec_ursra_b_arm
+#define helper_gvec_ursra_d helper_gvec_ursra_d_arm
+#define helper_gvec_ursra_h helper_gvec_ursra_h_arm
+#define helper_gvec_ursra_s helper_gvec_ursra_s_arm
+#define helper_gvec_usra_b helper_gvec_usra_b_arm
+#define helper_gvec_usra_d helper_gvec_usra_d_arm
+#define helper_gvec_usra_h helper_gvec_usra_h_arm
+#define helper_gvec_usra_s helper_gvec_usra_s_arm
 #endif
diff --git a/qemu/exec.c b/qemu/exec.c
index 9786b19557..e9070d3448 100644
--- a/qemu/exec.c
+++ b/qemu/exec.c
@@ -171,9 +171,7 @@ static void phys_page_set(AddressSpaceDispatch *d,
                           hwaddr index, uint64_t nb,
                           uint16_t leaf)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = d->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = d->uc;
     /* Wildly overreserve - it doesn't matter much. */
     phys_map_node_reserve(d, &d->map, 3 * P_L2_LEVELS);
 
@@ -254,9 +252,7 @@ static inline bool section_covers_addr(const MemoryRegionSection *section,
 
 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = d->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = d->uc;
     PhysPageEntry lp = d->phys_map, *p;
     Node *nodes = d->map.nodes;
     MemoryRegionSection *sections = d->map.sections;
@@ -283,9 +279,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
                                                         hwaddr addr,
                                                         bool resolve_subpage)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = d->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = d->uc;
     MemoryRegionSection *section = d->mru_section;
     subpage_t *subpage;
 
@@ -1421,9 +1415,7 @@ static uint16_t dummy_section(struct uc_struct *uc, PhysPageMap *map, FlatView *
 MemoryRegionSection *iotlb_to_section(CPUState *cpu,
                                       hwaddr index, MemTxAttrs attrs)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = cpu->uc;
     int asidx = cpu_asidx_from_attrs(cpu, attrs);
     CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
     AddressSpaceDispatch *d = cpuas->memory_dispatch;
@@ -1459,7 +1451,7 @@ AddressSpaceDispatch *address_space_dispatch_new(struct uc_struct *uc, FlatView
 void address_space_dispatch_clear(AddressSpaceDispatch *d)
 {
     MemoryRegionSection *section;
-    struct uc_struct *uc = d->uc;
+    UNICORN_UNUSED struct uc_struct *uc = d->uc;
     while (d->map.sections_nb > 0) {
         d->map.sections_nb--;
         section = &d->map.sections[d->map.sections_nb];
@@ -1891,7 +1883,7 @@ void *address_space_map(AddressSpace *as,
     MemoryRegion *mr;
     void *ptr;
     FlatView *fv;
-    struct uc_struct *uc = as->uc;
+    UNICORN_UNUSED struct uc_struct *uc = as->uc;
 
     if (len == 0) {
         return NULL;
@@ -2020,9 +2012,7 @@ static inline MemoryRegion *address_space_translate_cached(
 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
                         void *ptr, target_ulong len, bool is_write)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = cpu->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = cpu->uc;
     hwaddr phys_addr;
     target_ulong l, page;
     uint8_t *buf = ptr;
@@ -2030,6 +2020,7 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
     while (len > 0) {
         int asidx;
         MemTxAttrs attrs;
+        MemTxResult res;
 
         page = addr & TARGET_PAGE_MASK;
         phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
@@ -2042,12 +2033,15 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
             l = len;
         phys_addr += (addr & ~TARGET_PAGE_MASK);
         if (is_write) {
-            address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
+            res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
                                     attrs, buf, l);
         } else {
-            address_space_read(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf,
+            res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf,
                                l);
         }
+        if (res != MEMTX_OK) {
+            return -1;
+        }
         len -= l;
         buf += l;
         addr += l;
diff --git a/qemu/fpu/softfloat-specialize.inc.c b/qemu/fpu/softfloat-specialize.inc.c
index 5ab2fa1941..034d18199c 100644
--- a/qemu/fpu/softfloat-specialize.inc.c
+++ b/qemu/fpu/softfloat-specialize.inc.c
@@ -93,7 +93,7 @@ this code that are retained.
  * 2008 revision and backward compatibility with their original choice.
  * Thus for MIPS we must make the choice at runtime.
  */
-static inline flag snan_bit_is_one(float_status *status)
+static inline bool snan_bit_is_one(float_status *status)
 {
 #if defined(TARGET_MIPS)
     return status->snan_bit_is_one;
@@ -114,7 +114,7 @@ static bool parts_is_snan_frac(uint64_t frac, float_status *status)
 #ifdef NO_SIGNALING_NANS
     return false;
 #else
-    flag msb = extract64(frac, DECOMPOSED_BINARY_POINT - 1, 1);
+    bool msb = extract64(frac, DECOMPOSED_BINARY_POINT - 1, 1);
     return msb == snan_bit_is_one(status);
 #endif
 }
@@ -236,7 +236,7 @@ void float_raise(uint8_t flags, float_status *status)
 | Internal canonical NaN format.
 *----------------------------------------------------------------------------*/
 typedef struct {
-    flag sign;
+    bool sign;
     uint64_t high, low;
 } commonNaNT;
 
@@ -245,7 +245,7 @@ typedef struct {
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float16_is_quiet_nan(float16 a_, float_status *status)
+bool float16_is_quiet_nan(float16 a_, float_status *status)
 {
 #ifdef NO_SIGNALING_NANS
     return float16_is_any_nan(a_);
@@ -254,7 +254,7 @@ int float16_is_quiet_nan(float16 a_, float_status *status)
     if (snan_bit_is_one(status)) {
         return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
     } else {
-        return ((a & ~0x8000) >= 0x7C80);
+        return ((a >> 9) & 0x3F) == 0x3F;
     }
 #endif
 }
@@ -264,14 +264,14 @@ int float16_is_quiet_nan(float16 a_, float_status *status)
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float16_is_signaling_nan(float16 a_, float_status *status)
+bool float16_is_signaling_nan(float16 a_, float_status *status)
 {
 #ifdef NO_SIGNALING_NANS
     return 0;
 #else
     uint16_t a = float16_val(a_);
     if (snan_bit_is_one(status)) {
-        return ((a & ~0x8000) >= 0x7C80);
+        return ((a >> 9) & 0x3F) == 0x3F;
     } else {
         return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
     }
@@ -283,7 +283,7 @@ int float16_is_signaling_nan(float16 a_, float_status *status)
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float32_is_quiet_nan(float32 a_, float_status *status)
+bool float32_is_quiet_nan(float32 a_, float_status *status)
 {
 #ifdef NO_SIGNALING_NANS
     return float32_is_any_nan(a_);
@@ -302,7 +302,7 @@ int float32_is_quiet_nan(float32 a_, float_status *status)
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float32_is_signaling_nan(float32 a_, float_status *status)
+bool float32_is_signaling_nan(float32 a_, float_status *status)
 {
 #ifdef NO_SIGNALING_NANS
     return 0;
@@ -374,7 +374,7 @@ static float32 commonNaNToFloat32(commonNaNT a, float_status *status)
 *----------------------------------------------------------------------------*/
 
 static int pickNaN(FloatClass a_cls, FloatClass b_cls,
-                   flag aIsLargerSignificand)
+                   bool aIsLargerSignificand)
 {
 #if defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA)
     /* ARM mandated NaN propagation rules (see FPProcessNaNs()), take
@@ -584,7 +584,7 @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
 
 static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status)
 {
-    flag aIsLargerSignificand;
+    bool aIsLargerSignificand;
     uint32_t av, bv;
     FloatClass a_cls, b_cls;
 
@@ -637,7 +637,7 @@ static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status)
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float64_is_quiet_nan(float64 a_, float_status *status)
+bool float64_is_quiet_nan(float64 a_, float_status *status)
 {
 #ifdef NO_SIGNALING_NANS
     return float64_is_any_nan(a_);
@@ -657,7 +657,7 @@ int float64_is_quiet_nan(float64 a_, float_status *status)
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float64_is_signaling_nan(float64 a_, float_status *status)
+bool float64_is_signaling_nan(float64 a_, float_status *status)
 {
 #ifdef NO_SIGNALING_NANS
     return 0;
@@ -722,7 +722,7 @@ static float64 commonNaNToFloat64(commonNaNT a, float_status *status)
 
 static float64 propagateFloat64NaN(float64 a, float64 b, float_status *status)
 {
-    flag aIsLargerSignificand;
+    bool aIsLargerSignificand;
     uint64_t av, bv;
     FloatClass a_cls, b_cls;
 
@@ -890,7 +890,7 @@ static floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status)
 
 floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
 {
-    flag aIsLargerSignificand;
+    bool aIsLargerSignificand;
     FloatClass a_cls, b_cls;
 
     /* This is not complete, but is good enough for pickNaN.  */
@@ -939,7 +939,7 @@ floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status)
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float128_is_quiet_nan(float128 a, float_status *status)
+bool float128_is_quiet_nan(float128 a, float_status *status)
 {
 #ifdef NO_SIGNALING_NANS
     return float128_is_any_nan(a);
@@ -959,7 +959,7 @@ int float128_is_quiet_nan(float128 a, float_status *status)
 | signaling NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float128_is_signaling_nan(float128 a, float_status *status)
+bool float128_is_signaling_nan(float128 a, float_status *status)
 {
 #ifdef NO_SIGNALING_NANS
     return 0;
@@ -1038,7 +1038,7 @@ static float128 commonNaNToFloat128(commonNaNT a, float_status *status)
 static float128 propagateFloat128NaN(float128 a, float128 b,
                                      float_status *status)
 {
-    flag aIsLargerSignificand;
+    bool aIsLargerSignificand;
     FloatClass a_cls, b_cls;
 
     /* This is not complete, but is good enough for pickNaN.  */
diff --git a/qemu/fpu/softfloat.c b/qemu/fpu/softfloat.c
index 0e7938dc1c..930a2e352a 100644
--- a/qemu/fpu/softfloat.c
+++ b/qemu/fpu/softfloat.c
@@ -114,7 +114,7 @@ this code that are retained.
  *
  * The idea is thus to leverage the host FPU to (1) compute FP operations
  * and (2) identify whether FP exceptions occurred while avoiding
- * expensive exception flag register accesses.
+ * expensive exception bool register accesses.
  *
  * An important optimization shown in the paper is that given that exception
  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
@@ -217,7 +217,7 @@ GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 
 /*
  * Some targets clear the FP flags before most FP operations. This prevents
- * the use of hardfloat, since hardfloat relies on the inexact flag being
+ * the use of hardfloat, since hardfloat relies on the inexact bool being
  * already set.
  */
 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
@@ -342,12 +342,10 @@ static inline bool f64_is_inf(union_float64 a)
     return float64_is_infinity(a.s);
 }
 
-/* Note: @fast_test and @post can be NULL */
 static inline float32
 float32_gen2(float32 xa, float32 xb, float_status *s,
              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
-             f32_check_fn pre, f32_check_fn post,
-             f32_check_fn fast_test, soft_f32_op2_fn fast_op)
+             f32_check_fn pre, f32_check_fn post)
 {
     union_float32 ua, ub, ur;
 
@@ -362,17 +360,12 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
     if (unlikely(!pre(ua, ub))) {
         goto soft;
     }
-    if (fast_test && fast_test(ua, ub)) {
-        return fast_op(ua.s, ub.s, s);
-    }
 
     ur.h = hard(ua.h, ub.h);
     if (unlikely(f32_is_inf(ur))) {
         s->float_exception_flags |= float_flag_overflow;
-    } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
-        if (post == NULL || post(ua, ub)) {
-            goto soft;
-        }
+    } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
+        goto soft;
     }
     return ur.s;
 
@@ -383,8 +376,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
 static inline float64
 float64_gen2(float64 xa, float64 xb, float_status *s,
              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
-             f64_check_fn pre, f64_check_fn post,
-             f64_check_fn fast_test, soft_f64_op2_fn fast_op)
+             f64_check_fn pre, f64_check_fn post)
 {
     union_float64 ua, ub, ur;
 
@@ -399,17 +391,12 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
     if (unlikely(!pre(ua, ub))) {
         goto soft;
     }
-    if (fast_test && fast_test(ua, ub)) {
-        return fast_op(ua.s, ub.s, s);
-    }
 
     ur.h = hard(ua.h, ub.h);
     if (unlikely(f64_is_inf(ur))) {
         s->float_exception_flags |= float_flag_overflow;
-    } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
-        if (post == NULL || post(ua, ub)) {
-            goto soft;
-        }
+    } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
+        goto soft;
     }
     return ur.s;
 
@@ -439,7 +426,7 @@ static inline int extractFloat32Exp(float32 a)
 | Returns the sign bit of the single-precision floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-static inline flag extractFloat32Sign(float32 a)
+static inline bool extractFloat32Sign(float32 a)
 {
     return float32_val(a) >> 31;
 }
@@ -466,7 +453,7 @@ static inline int extractFloat64Exp(float64 a)
 | Returns the sign bit of the double-precision floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-static inline flag extractFloat64Sign(float64 a)
+static inline bool extractFloat64Sign(float64 a)
 {
     return float64_val(a) >> 63;
 }
@@ -786,8 +773,7 @@ static FloatParts round_canonical(FloatParts p, float_status *s,
             p.cls = float_class_zero;
             goto do_zero;
         } else {
-            bool is_tiny = (s->float_detect_tininess
-                            == float_tininess_before_rounding)
+            bool is_tiny = s->tininess_before_rounding
                         || (exp < 0)
                         || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
 
@@ -802,6 +788,8 @@ static FloatParts round_canonical(FloatParts p, float_status *s,
                 case float_round_to_odd:
                     inc = frac & frac_lsb ? 0 : round_mask;
                     break;
+                default:
+                    break;
                 }
                 flags |= float_flag_inexact;
                 frac += inc;
@@ -1149,7 +1137,7 @@ static double hard_f64_sub(double a, double b)
     return a - b;
 }
 
-static bool f32_addsub_post(union_float32 a, union_float32 b)
+static bool f32_addsubmul_post(union_float32 a, union_float32 b)
 {
     if (QEMU_HARDFLOAT_2F32_USE_FP) {
         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
@@ -1157,7 +1145,7 @@ static bool f32_addsub_post(union_float32 a, union_float32 b)
     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
 }
 
-static bool f64_addsub_post(union_float64 a, union_float64 b)
+static bool f64_addsubmul_post(union_float64 a, union_float64 b)
 {
     if (QEMU_HARDFLOAT_2F64_USE_FP) {
         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
@@ -1170,14 +1158,14 @@ static float32 float32_addsub(float32 a, float32 b, float_status *s,
                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
 {
     return float32_gen2(a, b, s, hard, soft,
-                        f32_is_zon2, f32_addsub_post, NULL, NULL);
+                        f32_is_zon2, f32_addsubmul_post);
 }
 
 static float64 float64_addsub(float64 a, float64 b, float_status *s,
                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
 {
     return float64_gen2(a, b, s, hard, soft,
-                        f64_is_zon2, f64_addsub_post, NULL, NULL);
+                        f64_is_zon2, f64_addsubmul_post);
 }
 
 float32 QEMU_FLATTEN
@@ -1294,42 +1282,18 @@ static double hard_f64_mul(double a, double b)
     return a * b;
 }
 
-static bool f32_mul_fast_test(union_float32 a, union_float32 b)
-{
-    return float32_is_zero(a.s) || float32_is_zero(b.s);
-}
-
-static bool f64_mul_fast_test(union_float64 a, union_float64 b)
-{
-    return float64_is_zero(a.s) || float64_is_zero(b.s);
-}
-
-static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
-{
-    bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
-
-    return float32_set_sign(float32_zero, signbit);
-}
-
-static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
-{
-    bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
-
-    return float64_set_sign(float64_zero, signbit);
-}
-
 float32 QEMU_FLATTEN
 float32_mul(float32 a, float32 b, float_status *s)
 {
     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
-                        f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
+                        f32_is_zon2, f32_addsubmul_post);
 }
 
 float64 QEMU_FLATTEN
 float64_mul(float64 a, float64 b, float_status *s)
 {
     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
-                        f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
+                        f64_is_zon2, f64_addsubmul_post);
 }
 
 /*
@@ -1872,14 +1836,14 @@ float32 QEMU_FLATTEN
 float32_div(float32 a, float32 b, float_status *s)
 {
     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
-                        f32_div_pre, f32_div_post, NULL, NULL);
+                        f32_div_pre, f32_div_post);
 }
 
 float64 QEMU_FLATTEN
 float64_div(float64 a, float64 b, float_status *s)
 {
     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
-                        f64_div_pre, f64_div_post, NULL, NULL);
+                        f64_div_pre, f64_div_post);
 }
 
 /*
@@ -2004,7 +1968,7 @@ float32 float64_to_float32(float64 a, float_status *s)
  * Arithmetic.
  */
 
-static FloatParts round_to_int(FloatParts a, int rmode,
+static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
                                int scale, float_status *s)
 {
     switch (a.cls) {
@@ -2139,7 +2103,7 @@ float64 float64_round_to_int(float64 a, float_status *s)
  * is returned.
 */
 
-static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
+static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode, int scale,
                                      int64_t min, int64_t max,
                                      float_status *s)
 {
@@ -2191,63 +2155,63 @@ static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
     }
 }
 
-int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
+int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
     return round_to_int_and_pack(float16_unpack_canonical(a, s),
                                  rmode, scale, INT16_MIN, INT16_MAX, s);
 }
 
-int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
+int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
     return round_to_int_and_pack(float16_unpack_canonical(a, s),
                                  rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
-int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
+int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
     return round_to_int_and_pack(float16_unpack_canonical(a, s),
                                  rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
-int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
+int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
     return round_to_int_and_pack(float32_unpack_canonical(a, s),
                                  rmode, scale, INT16_MIN, INT16_MAX, s);
 }
 
-int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
+int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
     return round_to_int_and_pack(float32_unpack_canonical(a, s),
                                  rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
-int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
+int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
     return round_to_int_and_pack(float32_unpack_canonical(a, s),
                                  rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
-int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
+int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
     return round_to_int_and_pack(float64_unpack_canonical(a, s),
                                  rmode, scale, INT16_MIN, INT16_MAX, s);
 }
 
-int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
+int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
     return round_to_int_and_pack(float64_unpack_canonical(a, s),
                                  rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
-int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
+int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
     return round_to_int_and_pack(float64_unpack_canonical(a, s),
@@ -2357,7 +2321,7 @@ int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
  *  flag.
  */
 
-static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
+static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode, int scale,
                                        uint64_t max, float_status *s)
 {
     int orig_flags = get_float_exception_flags(s);
@@ -2404,63 +2368,63 @@ static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
     }
 }
 
-uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
+uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
     return round_to_uint_and_pack(float16_unpack_canonical(a, s),
                                   rmode, scale, UINT16_MAX, s);
 }
 
-uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
+uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
     return round_to_uint_and_pack(float16_unpack_canonical(a, s),
                                   rmode, scale, UINT32_MAX, s);
 }
 
-uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
+uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
     return round_to_uint_and_pack(float16_unpack_canonical(a, s),
                                   rmode, scale, UINT64_MAX, s);
 }
 
-uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
+uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
     return round_to_uint_and_pack(float32_unpack_canonical(a, s),
                                   rmode, scale, UINT16_MAX, s);
 }
 
-uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
+uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
     return round_to_uint_and_pack(float32_unpack_canonical(a, s),
                                   rmode, scale, UINT32_MAX, s);
 }
 
-uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
+uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
     return round_to_uint_and_pack(float32_unpack_canonical(a, s),
                                   rmode, scale, UINT64_MAX, s);
 }
 
-uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
+uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
     return round_to_uint_and_pack(float64_unpack_canonical(a, s),
                                   rmode, scale, UINT16_MAX, s);
 }
 
-uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
+uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
     return round_to_uint_and_pack(float64_unpack_canonical(a, s),
                                   rmode, scale, UINT32_MAX, s);
 }
 
-uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
+uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
     return round_to_uint_and_pack(float64_unpack_canonical(a, s),
@@ -2934,8 +2898,8 @@ MINMAX(64, maxnummag, false, true, true)
 #undef MINMAX
 
 /* Floating point compare */
-static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
-                          float_status *s)
+static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
+                                    float_status *s)
 {
     if (is_nan(a.cls) || is_nan(b.cls)) {
         if (!is_quiet ||
@@ -3006,17 +2970,17 @@ COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
 
 #undef COMPARE
 
-int float16_compare(float16 a, float16 b, float_status *s)
+FloatRelation float16_compare(float16 a, float16 b, float_status *s)
 {
     return soft_f16_compare(a, b, false, s);
 }
 
-int float16_compare_quiet(float16 a, float16 b, float_status *s)
+FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
 {
     return soft_f16_compare(a, b, true, s);
 }
 
-static int QEMU_FLATTEN
+static FloatRelation QEMU_FLATTEN
 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
 {
     union_float32 ua, ub;
@@ -3045,17 +3009,17 @@ f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
 }
 
-int float32_compare(float32 a, float32 b, float_status *s)
+FloatRelation float32_compare(float32 a, float32 b, float_status *s)
 {
     return f32_compare(a, b, false, s);
 }
 
-int float32_compare_quiet(float32 a, float32 b, float_status *s)
+FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
 {
     return f32_compare(a, b, true, s);
 }
 
-static int QEMU_FLATTEN
+static FloatRelation QEMU_FLATTEN
 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
 {
     union_float64 ua, ub;
@@ -3084,12 +3048,12 @@ f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
 }
 
-int float64_compare(float64 a, float64 b, float_status *s)
+FloatRelation float64_compare(float64 a, float64 b, float_status *s)
 {
     return f64_compare(a, b, false, s);
 }
 
-int float64_compare_quiet(float64 a, float64 b, float_status *s)
+FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
 {
     return f64_compare(a, b, true, s);
 }
@@ -3420,10 +3384,10 @@ float64 float64_squash_input_denormal(float64 a, float_status *status)
 | positive or negative integer is returned.
 *----------------------------------------------------------------------------*/
 
-static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
+static int32_t roundAndPackInt32(bool zSign, uint64_t absZ, float_status *status)
 {
     int8_t roundingMode;
-    flag roundNearestEven;
+    bool roundNearestEven;
     int8_t roundIncrement, roundBits;
     int32_t z;
 
@@ -3451,7 +3415,9 @@ static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status
     }
     roundBits = absZ & 0x7F;
     absZ = ( absZ + roundIncrement )>>7;
-    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+    if (!(roundBits ^ 0x40) && roundNearestEven) {
+        absZ &= ~1;
+    }
     z = absZ;
     if ( zSign ) z = - z;
     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
@@ -3477,11 +3443,11 @@ static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status
 | returned.
 *----------------------------------------------------------------------------*/
 
-static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
+static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
                                float_status *status)
 {
     int8_t roundingMode;
-    flag roundNearestEven, increment;
+    bool roundNearestEven, increment;
     int64_t z;
 
     roundingMode = status->float_rounding_mode;
@@ -3509,7 +3475,9 @@ static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
     if ( increment ) {
         ++absZ0;
         if ( absZ0 == 0 ) goto overflow;
-        absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
+        if (!(absZ1 << 1) && roundNearestEven) {
+            absZ0 &= ~1;
+        }
     }
     z = absZ0;
     if ( zSign ) z = - z;
@@ -3535,11 +3503,11 @@ static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
 | exception is raised and the largest unsigned integer is returned.
 *----------------------------------------------------------------------------*/
 
-static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
+static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
                                 uint64_t absZ1, float_status *status)
 {
     int8_t roundingMode;
-    flag roundNearestEven, increment;
+    bool roundNearestEven, increment;
 
     roundingMode = status->float_rounding_mode;
     roundNearestEven = (roundingMode == float_round_nearest_even);
@@ -3569,7 +3537,9 @@ static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
             float_raise(float_flag_invalid, status);
             return UINT64_MAX;
         }
-        absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
+        if (!(absZ1 << 1) && roundNearestEven) {
+            absZ0 &= ~1;
+        }
     }
 
     if (zSign && absZ0) {
@@ -3623,13 +3593,13 @@ static void
 | Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
+static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
                                    float_status *status)
 {
     int8_t roundingMode;
-    flag roundNearestEven;
+    bool roundNearestEven;
     int8_t roundIncrement, roundBits;
-    flag isTiny;
+    bool isTiny;
 
     roundingMode = status->float_rounding_mode;
     roundNearestEven = ( roundingMode == float_round_nearest_even );
@@ -3670,11 +3640,9 @@ static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
                 float_raise(float_flag_output_denormal, status);
                 return packFloat32(zSign, 0, 0);
             }
-            isTiny =
-                (status->float_detect_tininess
-                 == float_tininess_before_rounding)
-                || ( zExp < -1 )
-                || ( zSig + roundIncrement < 0x80000000 );
+            isTiny = status->tininess_before_rounding
+                  || ( zExp < -1 )
+                  || ( zSig + roundIncrement < 0x80000000 );
             shift32RightJamming( zSig, - zExp, &zSig );
             zExp = 0;
             roundBits = zSig & 0x7F;
@@ -3694,7 +3662,9 @@ static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
         status->float_exception_flags |= float_flag_inexact;
     }
     zSig = ( zSig + roundIncrement )>>7;
-    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+    if (!(roundBits ^ 0x40) && roundNearestEven) {
+        zSig &= ~1;
+    }
     if ( zSig == 0 ) zExp = 0;
     return packFloat32( zSign, zExp, zSig );
 
@@ -3710,7 +3680,7 @@ static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
 *----------------------------------------------------------------------------*/
 
 static float32
- normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
+ normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
                               float_status *status)
 {
     int8_t shiftCount;
@@ -3750,7 +3720,7 @@ static void
 | significand.
 *----------------------------------------------------------------------------*/
 
-static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
+static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
 {
 
     return make_float64(
@@ -3780,13 +3750,13 @@ static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
 | Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
+static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
                                    float_status *status)
 {
     int8_t roundingMode;
-    flag roundNearestEven;
+    bool roundNearestEven;
     int roundIncrement, roundBits;
-    flag isTiny;
+    bool isTiny;
 
     roundingMode = status->float_rounding_mode;
     roundNearestEven = ( roundingMode == float_round_nearest_even );
@@ -3826,11 +3796,9 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
                 float_raise(float_flag_output_denormal, status);
                 return packFloat64(zSign, 0, 0);
             }
-            isTiny =
-                   (status->float_detect_tininess
-                    == float_tininess_before_rounding)
-                || ( zExp < -1 )
-                || ( zSig + roundIncrement < UINT64_C(0x8000000000000000) );
+            isTiny = status->tininess_before_rounding
+                  || ( zExp < -1 )
+                  || ( zSig + roundIncrement < UINT64_C(0x8000000000000000) );
             shift64RightJamming( zSig, - zExp, &zSig );
             zExp = 0;
             roundBits = zSig & 0x3FF;
@@ -3850,7 +3818,9 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
         status->float_exception_flags |= float_flag_inexact;
     }
     zSig = ( zSig + roundIncrement )>>10;
-    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
+    if (!(roundBits ^ 0x200) && roundNearestEven) {
+        zSig &= ~1;
+    }
     if ( zSig == 0 ) zExp = 0;
     return packFloat64( zSign, zExp, zSig );
 
@@ -3866,7 +3836,7 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
 *----------------------------------------------------------------------------*/
 
 static float64
- normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
+ normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
                               float_status *status)
 {
     int8_t shiftCount;
@@ -3918,12 +3888,12 @@ void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
 | Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
+floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
                               float_status *status)
 {
     int8_t roundingMode;
-    flag roundNearestEven, increment, isTiny;
+    bool roundNearestEven, increment, isTiny;
     int64_t roundIncrement, roundMask, roundBits;
 
     roundingMode = status->float_rounding_mode;
@@ -3969,11 +3939,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
                 float_raise(float_flag_output_denormal, status);
                 return packFloatx80(zSign, 0, 0);
             }
-            isTiny =
-                   (status->float_detect_tininess
-                    == float_tininess_before_rounding)
-                || ( zExp < 0 )
-                || ( zSig0 <= zSig0 + roundIncrement );
+            isTiny = status->tininess_before_rounding
+                  || ( zExp < 0 )
+                  || ( zSig0 <= zSig0 + roundIncrement );
             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
             zExp = 0;
             roundBits = zSig0 & roundMask;
@@ -4047,12 +4015,10 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
                                 floatx80_infinity_low);
         }
         if ( zExp <= 0 ) {
-            isTiny =
-                   (status->float_detect_tininess
-                    == float_tininess_before_rounding)
-                || ( zExp < 0 )
-                || ! increment
-                || ( zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF) );
+            isTiny = status->tininess_before_rounding
+                  || ( zExp < 0 )
+                  || ! increment
+                  || ( zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF) );
             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
             zExp = 0;
             if (isTiny && zSig1) {
@@ -4080,8 +4046,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
             }
             if ( increment ) {
                 ++zSig0;
-                zSig0 &=
-                    ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+                if (!(zSig1 << 1) && roundNearestEven) {
+                    zSig0 &= ~1;
+                }
                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
             }
             return packFloatx80( zSign, zExp, zSig0 );
@@ -4097,7 +4064,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
             zSig0 = UINT64_C(0x8000000000000000);
         }
         else {
-            zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+            if (!(zSig1 << 1) && roundNearestEven) {
+                zSig0 &= ~1;
+            }
         }
     }
     else {
@@ -4117,7 +4086,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
 *----------------------------------------------------------------------------*/
 
 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
-                                       flag zSign, int32_t zExp,
+                                       bool zSign, int32_t zExp,
                                        uint64_t zSig0, uint64_t zSig1,
                                        float_status *status)
 {
@@ -4176,10 +4145,10 @@ static inline int32_t extractFloat128Exp( float128 a )
 | Returns the sign bit of the quadruple-precision floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-static inline flag extractFloat128Sign( float128 a )
+static inline bool extractFloat128Sign( float128 a )
 {
 
-    return a.high>>63;
+    return a.high >> 63;
 
 }
 
@@ -4238,7 +4207,7 @@ static void
 *----------------------------------------------------------------------------*/
 
 static inline float128
- packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
+ packFloat128( bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
 {
     float128 z;
 
@@ -4269,12 +4238,12 @@ static inline float128
 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
+static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
                                      uint64_t zSig0, uint64_t zSig1,
                                      uint64_t zSig2, float_status *status)
 {
     int8_t roundingMode;
-    flag roundNearestEven, increment, isTiny;
+    bool roundNearestEven, increment, isTiny;
 
     roundingMode = status->float_rounding_mode;
     roundNearestEven = ( roundingMode == float_round_nearest_even );
@@ -4331,17 +4300,12 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
                 float_raise(float_flag_output_denormal, status);
                 return packFloat128(zSign, 0, 0, 0);
             }
-            isTiny =
-                   (status->float_detect_tininess
-                    == float_tininess_before_rounding)
-                || ( zExp < -1 )
-                || ! increment
-                || lt128(
-                       zSig0,
-                       zSig1,
-                       UINT64_C(0x0001FFFFFFFFFFFF),
-                       UINT64_C(0xFFFFFFFFFFFFFFFF)
-                   );
+            isTiny = status->tininess_before_rounding
+                  || ( zExp < -1 )
+                  || ! increment
+                  || lt128(zSig0, zSig1,
+                           UINT64_C(0x0001FFFFFFFFFFFF),
+                           UINT64_C(0xFFFFFFFFFFFFFFFF));
             shift128ExtraRightJamming(
                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
             zExp = 0;
@@ -4375,7 +4339,9 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
     }
     if ( increment ) {
         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
-        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
+        if ((zSig2 + zSig2 == 0) && roundNearestEven) {
+            zSig1 &= ~1;
+        }
     }
     else {
         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
@@ -4394,7 +4360,7 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
 | point exponent.
 *----------------------------------------------------------------------------*/
 
-static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
+static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
                                               uint64_t zSig0, uint64_t zSig1,
                                               float_status *status)
 {
@@ -4430,7 +4396,7 @@ static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
 
 floatx80 int32_to_floatx80(int32_t a, float_status *status)
 {
-    flag zSign;
+    bool zSign;
     uint32_t absA;
     int8_t shiftCount;
     uint64_t zSig;
@@ -4452,7 +4418,7 @@ floatx80 int32_to_floatx80(int32_t a, float_status *status)
 
 float128 int32_to_float128(int32_t a, float_status *status)
 {
-    flag zSign;
+    bool zSign;
     uint32_t absA;
     int8_t shiftCount;
     uint64_t zSig0;
@@ -4475,7 +4441,7 @@ float128 int32_to_float128(int32_t a, float_status *status)
 
 floatx80 int64_to_floatx80(int64_t a, float_status *status)
 {
-    flag zSign;
+    bool zSign;
     uint64_t absA;
     int8_t shiftCount;
 
@@ -4495,7 +4461,7 @@ floatx80 int64_to_floatx80(int64_t a, float_status *status)
 
 float128 int64_to_float128(int64_t a, float_status *status)
 {
-    flag zSign;
+    bool zSign;
     uint64_t absA;
     int8_t shiftCount;
     int32_t zExp;
@@ -4543,7 +4509,7 @@ float128 uint64_to_float128(uint64_t a, float_status *status)
 
 floatx80 float32_to_floatx80(float32 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int aExp;
     uint32_t aSig;
 
@@ -4553,7 +4519,8 @@ floatx80 float32_to_floatx80(float32 a, float_status *status)
     aSign = extractFloat32Sign( a );
     if ( aExp == 0xFF ) {
         if (aSig) {
-            return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
+            floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
+            return floatx80_silence_nan(res, status);
         }
         return packFloatx80(aSign,
                             floatx80_infinity_high,
@@ -4577,7 +4544,7 @@ floatx80 float32_to_floatx80(float32 a, float_status *status)
 
 float128 float32_to_float128(float32 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int aExp;
     uint32_t aSig;
 
@@ -4608,7 +4575,7 @@ float128 float32_to_float128(float32 a, float_status *status)
 
 float32 float32_rem(float32 a, float32 b, float_status *status)
 {
-    flag aSign, zSign;
+    bool aSign, zSign;
     int aExp, bExp, expDiff;
     uint32_t aSig, bSig;
     uint32_t q;
@@ -4751,7 +4718,7 @@ static const float64 float32_exp2_coefficients[15] =
 
 float32 float32_exp2(float32 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int aExp;
     uint32_t aSig;
     float64 r, x, xn;
@@ -4801,7 +4768,7 @@ float32 float32_exp2(float32 a, float_status *status)
 *----------------------------------------------------------------------------*/
 float32 float32_log2(float32 a, float_status *status)
 {
-    flag aSign, zSign;
+    bool aSign, zSign;
     int aExp;
     uint32_t aSig, zSig, i;
 
@@ -4848,222 +4815,6 @@ float32 float32_log2(float32 a, float_status *status)
     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_eq(float32 a, float32 b, float_status *status)
-{
-    uint32_t av, bv;
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    av = float32_val(a);
-    bv = float32_val(b);
-    return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is less than
-| or equal to the corresponding value `b', and 0 otherwise.  The invalid
-| exception is raised if either operand is a NaN.  The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_le(float32 a, float32 b, float_status *status)
-{
-    flag aSign, bSign;
-    uint32_t av, bv;
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    aSign = extractFloat32Sign( a );
-    bSign = extractFloat32Sign( b );
-    av = float32_val(a);
-    bv = float32_val(b);
-    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
-    return ( av == bv ) || ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  The comparison is performed according
-| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_lt(float32 a, float32 b, float_status *status)
-{
-    flag aSign, bSign;
-    uint32_t av, bv;
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    aSign = extractFloat32Sign( a );
-    bSign = extractFloat32Sign( b );
-    av = float32_val(a);
-    bv = float32_val(b);
-    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
-    return ( av != bv ) && ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise.  The invalid exception is raised if either
-| operand is a NaN.  The comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_unordered(float32 a, float32 b, float_status *status)
-{
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 1;
-    }
-    return 0;
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
-| exception.  The comparison is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_eq_quiet(float32 a, float32 b, float_status *status)
-{
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        if (float32_is_signaling_nan(a, status)
-         || float32_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    return ( float32_val(a) == float32_val(b) ) ||
-            ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is less than or
-| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
-| cause an exception.  Otherwise, the comparison is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_le_quiet(float32 a, float32 b, float_status *status)
-{
-    flag aSign, bSign;
-    uint32_t av, bv;
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        if (float32_is_signaling_nan(a, status)
-         || float32_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    aSign = extractFloat32Sign( a );
-    bSign = extractFloat32Sign( b );
-    av = float32_val(a);
-    bv = float32_val(b);
-    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
-    return ( av == bv ) || ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
-| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_lt_quiet(float32 a, float32 b, float_status *status)
-{
-    flag aSign, bSign;
-    uint32_t av, bv;
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        if (float32_is_signaling_nan(a, status)
-         || float32_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    aSign = extractFloat32Sign( a );
-    bSign = extractFloat32Sign( b );
-    av = float32_val(a);
-    bv = float32_val(b);
-    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
-    return ( av != bv ) && ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
-| comparison is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_unordered_quiet(float32 a, float32 b, float_status *status)
-{
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        if (float32_is_signaling_nan(a, status)
-         || float32_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 1;
-    }
-    return 0;
-}
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the double-precision floating-point value
 | `a' to the extended double-precision floating-point format.  The conversion
@@ -5073,7 +4824,7 @@ int float32_unordered_quiet(float32 a, float32 b, float_status *status)
 
 floatx80 float64_to_floatx80(float64 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int aExp;
     uint64_t aSig;
 
@@ -5083,7 +4834,8 @@ floatx80 float64_to_floatx80(float64 a, float_status *status)
     aSign = extractFloat64Sign( a );
     if ( aExp == 0x7FF ) {
         if (aSig) {
-            return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
+            floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
+            return floatx80_silence_nan(res, status);
         }
         return packFloatx80(aSign,
                             floatx80_infinity_high,
@@ -5108,7 +4860,7 @@ floatx80 float64_to_floatx80(float64 a, float_status *status)
 
 float128 float64_to_float128(float64 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int aExp;
     uint64_t aSig, zSig0, zSig1;
 
@@ -5141,7 +4893,7 @@ float128 float64_to_float128(float64 a, float_status *status)
 
 float64 float64_rem(float64 a, float64 b, float_status *status)
 {
-    flag aSign, zSign;
+    bool aSign, zSign;
     int aExp, bExp, expDiff;
     uint64_t aSig, bSig;
     uint64_t q, alternateASig;
@@ -5236,7 +4988,7 @@ float64 float64_rem(float64 a, float64 b, float_status *status)
 *----------------------------------------------------------------------------*/
 float64 float64_log2(float64 a, float_status *status)
 {
-    flag aSign, zSign;
+    bool aSign, zSign;
     int aExp;
     uint64_t aSig, aSig0, aSig1, zSig, i;
     a = float64_squash_input_denormal(a, status);
@@ -5283,361 +5035,141 @@ float64 float64_log2(float64 a, float_status *status)
 }
 
 /*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is equal to the
-| corresponding value `b', and 0 otherwise.  The invalid exception is raised
-| if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN, the
+| largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
 *----------------------------------------------------------------------------*/
 
-int float64_eq(float64 a, float64 b, float_status *status)
+int32_t floatx80_to_int32(floatx80 a, float_status *status)
 {
-    uint64_t av, bv;
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
+    bool aSign;
+    int32_t aExp, shiftCount;
+    uint64_t aSig;
 
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
+    if (floatx80_invalid_encoding(a)) {
         float_raise(float_flag_invalid, status);
-        return 0;
+        return 1 << 31;
     }
-    av = float64_val(a);
-    bv = float64_val(b);
-    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
+    shiftCount = 0x4037 - aExp;
+    if ( shiftCount <= 0 ) shiftCount = 1;
+    shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32(aSign, aSig, status);
 
 }
 
 /*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is less than or
-| equal to the corresponding value `b', and 0 otherwise.  The invalid
-| exception is raised if either operand is a NaN.  The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
 *----------------------------------------------------------------------------*/
 
-int float64_le(float64 a, float64 b, float_status *status)
+int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
 {
-    flag aSign, bSign;
-    uint64_t av, bv;
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
+    bool aSign;
+    int32_t aExp, shiftCount;
+    uint64_t aSig, savedASig;
+    int32_t z;
 
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
+    if (floatx80_invalid_encoding(a)) {
         float_raise(float_flag_invalid, status);
+        return 1 << 31;
+    }
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( 0x401E < aExp ) {
+        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
+        goto invalid;
+    }
+    else if ( aExp < 0x3FFF ) {
+        if (aExp || aSig) {
+            status->float_exception_flags |= float_flag_inexact;
+        }
         return 0;
     }
-    aSign = extractFloat64Sign( a );
-    bSign = extractFloat64Sign( b );
-    av = float64_val(a);
-    bv = float64_val(b);
-    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
-    return ( av == bv ) || ( aSign ^ ( av < bv ) );
+    shiftCount = 0x403E - aExp;
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_raise(float_flag_invalid, status);
+        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        status->float_exception_flags |= float_flag_inexact;
+    }
+    return z;
 
 }
 
 /*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  The comparison is performed according
-| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN,
+| the largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
 *----------------------------------------------------------------------------*/
 
-int float64_lt(float64 a, float64 b, float_status *status)
+int64_t floatx80_to_int64(floatx80 a, float_status *status)
 {
-    flag aSign, bSign;
-    uint64_t av, bv;
+    bool aSign;
+    int32_t aExp, shiftCount;
+    uint64_t aSig, aSigExtra;
 
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
+    if (floatx80_invalid_encoding(a)) {
         float_raise(float_flag_invalid, status);
-        return 0;
+        return 1ULL << 63;
     }
-    aSign = extractFloat64Sign( a );
-    bSign = extractFloat64Sign( b );
-    av = float64_val(a);
-    bv = float64_val(b);
-    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
-    return ( av != bv ) && ( aSign ^ ( av < bv ) );
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    shiftCount = 0x403E - aExp;
+    if ( shiftCount <= 0 ) {
+        if ( shiftCount ) {
+            float_raise(float_flag_invalid, status);
+            if (!aSign || floatx80_is_any_nan(a)) {
+                return INT64_MAX;
+            }
+            return INT64_MIN;
+        }
+        aSigExtra = 0;
+    }
+    else {
+        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+    }
+    return roundAndPackInt64(aSign, aSig, aSigExtra, status);
 
 }
 
 /*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise.  The invalid exception is raised if either
-| operand is a NaN.  The comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
 *----------------------------------------------------------------------------*/
 
-int float64_unordered(float64 a, float64 b, float_status *status)
+int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
 {
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 1;
-    }
-    return 0;
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is equal to the
-| corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
-| exception.The comparison is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_eq_quiet(float64 a, float64 b, float_status *status)
-{
-    uint64_t av, bv;
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        if (float64_is_signaling_nan(a, status)
-         || float64_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    av = float64_val(a);
-    bv = float64_val(b);
-    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is less than or
-| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
-| cause an exception.  Otherwise, the comparison is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_le_quiet(float64 a, float64 b, float_status *status)
-{
-    flag aSign, bSign;
-    uint64_t av, bv;
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        if (float64_is_signaling_nan(a, status)
-         || float64_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    aSign = extractFloat64Sign( a );
-    bSign = extractFloat64Sign( b );
-    av = float64_val(a);
-    bv = float64_val(b);
-    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
-    return ( av == bv ) || ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
-| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_lt_quiet(float64 a, float64 b, float_status *status)
-{
-    flag aSign, bSign;
-    uint64_t av, bv;
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        if (float64_is_signaling_nan(a, status)
-         || float64_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    aSign = extractFloat64Sign( a );
-    bSign = extractFloat64Sign( b );
-    av = float64_val(a);
-    bv = float64_val(b);
-    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
-    return ( av != bv ) && ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
-| comparison is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_unordered_quiet(float64 a, float64 b, float_status *status)
-{
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        if (float64_is_signaling_nan(a, status)
-         || float64_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 1;
-    }
-    return 0;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the 32-bit two's complement integer format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic---which means in particular that the conversion
-| is rounded according to the current rounding mode.  If `a' is a NaN, the
-| largest positive integer is returned.  Otherwise, if the conversion
-| overflows, the largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int32_t floatx80_to_int32(floatx80 a, float_status *status)
-{
-    flag aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig;
-
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return 1 << 31;
-    }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
-    shiftCount = 0x4037 - aExp;
-    if ( shiftCount <= 0 ) shiftCount = 1;
-    shift64RightJamming( aSig, shiftCount, &aSig );
-    return roundAndPackInt32(aSign, aSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the 32-bit two's complement integer format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic, except that the conversion is always rounded
-| toward zero.  If `a' is a NaN, the largest positive integer is returned.
-| Otherwise, if the conversion overflows, the largest integer with the same
-| sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
-{
-    flag aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig, savedASig;
-    int32_t z;
-
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return 1 << 31;
-    }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    if ( 0x401E < aExp ) {
-        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
-        goto invalid;
-    }
-    else if ( aExp < 0x3FFF ) {
-        if (aExp || aSig) {
-            status->float_exception_flags |= float_flag_inexact;
-        }
-        return 0;
-    }
-    shiftCount = 0x403E - aExp;
-    savedASig = aSig;
-    aSig >>= shiftCount;
-    z = aSig;
-    if ( aSign ) z = - z;
-    if ( ( z < 0 ) ^ aSign ) {
- invalid:
-        float_raise(float_flag_invalid, status);
-        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
-    }
-    if ( ( aSig<<shiftCount ) != savedASig ) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    return z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the 64-bit two's complement integer format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic---which means in particular that the conversion
-| is rounded according to the current rounding mode.  If `a' is a NaN,
-| the largest positive integer is returned.  Otherwise, if the conversion
-| overflows, the largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int64_t floatx80_to_int64(floatx80 a, float_status *status)
-{
-    flag aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig, aSigExtra;
-
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return 1ULL << 63;
-    }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    shiftCount = 0x403E - aExp;
-    if ( shiftCount <= 0 ) {
-        if ( shiftCount ) {
-            float_raise(float_flag_invalid, status);
-            if (!aSign || floatx80_is_any_nan(a)) {
-                return INT64_MAX;
-            }
-            return INT64_MIN;
-        }
-        aSigExtra = 0;
-    }
-    else {
-        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
-    }
-    return roundAndPackInt64(aSign, aSig, aSigExtra, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the 64-bit two's complement integer format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic, except that the conversion is always rounded
-| toward zero.  If `a' is a NaN, the largest positive integer is returned.
-| Otherwise, if the conversion overflows, the largest integer with the same
-| sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
-{
-    flag aSign;
+    bool aSign;
     int32_t aExp, shiftCount;
     uint64_t aSig;
     int64_t z;
@@ -5684,7 +5216,7 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
 
 float32 floatx80_to_float32(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -5697,7 +5229,8 @@ float32 floatx80_to_float32(floatx80 a, float_status *status)
     aSign = extractFloatx80Sign( a );
     if ( aExp == 0x7FFF ) {
         if ( (uint64_t) ( aSig<<1 ) ) {
-            return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
+            float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
+            return float32_silence_nan(res, status);
         }
         return packFloat32( aSign, 0xFF, 0 );
     }
@@ -5716,7 +5249,7 @@ float32 floatx80_to_float32(floatx80 a, float_status *status)
 
 float64 floatx80_to_float64(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig, zSig;
 
@@ -5729,7 +5262,8 @@ float64 floatx80_to_float64(floatx80 a, float_status *status)
     aSign = extractFloatx80Sign( a );
     if ( aExp == 0x7FFF ) {
         if ( (uint64_t) ( aSig<<1 ) ) {
-            return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
+            float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
+            return float64_silence_nan(res, status);
         }
         return packFloat64( aSign, 0x7FF, 0 );
     }
@@ -5748,7 +5282,7 @@ float64 floatx80_to_float64(floatx80 a, float_status *status)
 
 float128 floatx80_to_float128(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int aExp;
     uint64_t aSig, zSig0, zSig1;
 
@@ -5760,7 +5294,8 @@ float128 floatx80_to_float128(floatx80 a, float_status *status)
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
-        return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
+        float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
+        return float128_silence_nan(res, status);
     }
     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
     return packFloat128( aSign, aExp, zSig0, zSig1 );
@@ -5792,7 +5327,7 @@ floatx80 floatx80_round(floatx80 a, float_status *status)
 
 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t lastBitMask, roundBitsMask;
     floatx80 z;
@@ -5810,7 +5345,7 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
     }
     if ( aExp < 0x3FFF ) {
         if (    ( aExp == 0 )
-             && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+             && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
             return a;
         }
         status->float_exception_flags |= float_flag_inexact;
@@ -5837,6 +5372,10 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
             return
                   aSign ? packFloatx80( 1, 0, 0 )
                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
+        case float_round_to_zero:
+            break;
+        default:
+            g_assert_not_reached();
         }
         return packFloatx80( aSign, 0, 0 );
     }
@@ -5889,7 +5428,7 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
 | Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
+static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
                                 float_status *status)
 {
     int32_t aExp, bExp, zExp;
@@ -5935,6 +5474,12 @@ static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
         zSig1 = 0;
         zSig0 = aSig + bSig;
         if ( aExp == 0 ) {
+            if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
+                /* At least one of the values is a pseudo-denormal,
+                 * and there is a carry out of the result.  */
+                zExp = 1;
+                goto shiftRight1;
+            }
             if (zSig0 == 0) {
                 return packFloatx80(zSign, 0, 0);
             }
@@ -5963,7 +5508,7 @@ static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
 | Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
+static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
                                 float_status *status)
 {
     int32_t aExp, bExp, zExp;
@@ -6032,7 +5577,7 @@ static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
 
 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
 {
-    flag aSign, bSign;
+    bool aSign, bSign;
 
     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
         float_raise(float_flag_invalid, status);
@@ -6057,7 +5602,7 @@ floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
 
 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
 {
-    flag aSign, bSign;
+    bool aSign, bSign;
 
     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
         float_raise(float_flag_invalid, status);
@@ -6082,7 +5627,7 @@ floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
 
 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
 {
-    flag aSign, bSign, zSign;
+    bool aSign, bSign, zSign;
     int32_t aExp, bExp, zExp;
     uint64_t aSig, bSig, zSig0, zSig1;
 
@@ -6144,7 +5689,7 @@ floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
 
 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
 {
-    flag aSign, bSign, zSign;
+    bool aSign, bSign, zSign;
     int32_t aExp, bExp, zExp;
     uint64_t aSig, bSig, zSig0, zSig1;
     uint64_t rem0, rem1, rem2, term0, term1, term2;
@@ -6226,13 +5771,16 @@ floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
 /*----------------------------------------------------------------------------
 | Returns the remainder of the extended double-precision floating-point value
 | `a' with respect to the corresponding value `b'.  The operation is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
+| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
+| the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
+| the absolute value of the integer quotient.
 *----------------------------------------------------------------------------*/
 
-floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
+floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient, float_status *status)
 {
-    flag aSign, zSign;
-    int32_t aExp, bExp, expDiff;
+    bool aSign, zSign;
+    int32_t aExp, bExp, expDiff, aExpOrig;
     uint64_t aSig0, aSig1, bSig;
     uint64_t q, term0, term1, alternateASig0, alternateASig1;
 
@@ -6241,7 +5789,7 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
         return floatx80_default_nan(status);
     }
     aSig0 = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
+    aExpOrig = aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
     bSig = extractFloatx80Frac( b );
     bExp = extractFloatx80Exp( b );
@@ -6256,6 +5804,13 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
         if ((uint64_t)(bSig << 1)) {
             return propagateFloatx80NaN(a, b, status);
         }
+        if (aExp == 0 && aSig0 >> 63) {
+            /*
+             * Pseudo-denormal argument must be returned in normalized
+             * form.
+             */
+            return packFloatx80(aSign, 1, aSig0);
+        }
         return a;
     }
     if ( bExp == 0 ) {
@@ -6267,19 +5822,26 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
     }
     if ( aExp == 0 ) {
-        if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
+        if ( aSig0 == 0 ) return a;
         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
     }
-    bSig |= UINT64_C(0x8000000000000000);
     zSign = aSign;
     expDiff = aExp - bExp;
     aSig1 = 0;
     if ( expDiff < 0 ) {
-        if ( expDiff < -1 ) return a;
+        if ( mod || expDiff < -1 ) {
+            if (aExp == 1 && aExpOrig == 0) {
+                /*
+                 * Pseudo-denormal argument must be returned in
+                 * normalized form.
+                 */
+                return packFloatx80(aSign, aExp, aSig0);
+            }
+        }
         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
         expDiff = 0;
     }
-    q = ( bSig <= aSig0 );
+    *quotient = q = ( bSig <= aSig0 );
     if ( q ) aSig0 -= bSig;
     expDiff -= 64;
     while ( 0 < expDiff ) {
@@ -6289,6 +5851,8 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
         expDiff -= 62;
+        *quotient <<= 62;
+        *quotient += q;
     }
     expDiff += 64;
     if ( 0 < expDiff ) {
@@ -6302,19 +5866,28 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
             ++q;
             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
         }
+        if (expDiff < 64) {
+            *quotient <<= expDiff;
+        } else {
+            *quotient = 0;
+        }
+        *quotient += q;
     }
     else {
         term1 = 0;
         term0 = bSig;
     }
-    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
-    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
-         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
-              && ( q & 1 ) )
-       ) {
-        aSig0 = alternateASig0;
-        aSig1 = alternateASig1;
-        zSign = ! zSign;
+    if (!mod) {
+        sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+        if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+            || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+                && ( q & 1 ) )
+        ) {
+            aSig0 = alternateASig0;
+            aSig1 = alternateASig1;
+            zSign = ! zSign;
+            ++*quotient;
+        }
     }
     return
         normalizeRoundAndPackFloatx80(
@@ -6322,6 +5895,30 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
 
 }
 
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
+{
+    uint64_t quotient;
+    return floatx80_modrem(a, b, false, &quotient, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b', with the quotient truncated
+| toward zero.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
+{
+    uint64_t quotient;
+    return floatx80_modrem(a, b, true, &quotient, status);
+}
+
 /*----------------------------------------------------------------------------
 | Returns the square root of the extended double-precision floating-point
 | value `a'.  The operation is performed according to the IEC/IEEE Standard
@@ -6330,7 +5927,7 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
 
 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp, zExp;
     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
@@ -6393,263 +5990,6 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *status)
                                 0, zExp, zSig0, zSig1, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is equal
-| to the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
-{
-
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
-        || (extractFloatx80Exp(a) == 0x7FFF
-            && (uint64_t) (extractFloatx80Frac(a) << 1))
-        || (extractFloatx80Exp(b) == 0x7FFF
-            && (uint64_t) (extractFloatx80Frac(b) << 1))
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    return
-           ( a.low == b.low )
-        && (    ( a.high == b.high )
-             || (    ( a.low == 0 )
-                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
-           );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is
-| less than or equal to the corresponding value `b', and 0 otherwise.  The
-| invalid exception is raised if either operand is a NaN.  The comparison is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_le(floatx80 a, floatx80 b, float_status *status)
-{
-    flag aSign, bSign;
-
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
-        || (extractFloatx80Exp(a) == 0x7FFF
-            && (uint64_t) (extractFloatx80Frac(a) << 1))
-        || (extractFloatx80Exp(b) == 0x7FFF
-            && (uint64_t) (extractFloatx80Frac(b) << 1))
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    aSign = extractFloatx80Sign( a );
-    bSign = extractFloatx80Sign( b );
-    if ( aSign != bSign ) {
-        return
-               aSign
-            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
-                 == 0 );
-    }
-    return
-          aSign ? le128( b.high, b.low, a.high, a.low )
-        : le128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is
-| less than the corresponding value `b', and 0 otherwise.  The invalid
-| exception is raised if either operand is a NaN.  The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
-{
-    flag aSign, bSign;
-
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
-        || (extractFloatx80Exp(a) == 0x7FFF
-            && (uint64_t) (extractFloatx80Frac(a) << 1))
-        || (extractFloatx80Exp(b) == 0x7FFF
-            && (uint64_t) (extractFloatx80Frac(b) << 1))
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    aSign = extractFloatx80Sign( a );
-    bSign = extractFloatx80Sign( b );
-    if ( aSign != bSign ) {
-        return
-               aSign
-            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
-                 != 0 );
-    }
-    return
-          aSign ? lt128( b.high, b.low, a.high, a.low )
-        : lt128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point values `a' and `b'
-| cannot be compared, and 0 otherwise.  The invalid exception is raised if
-| either operand is a NaN.   The comparison is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
-{
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
-        || (extractFloatx80Exp(a) == 0x7FFF
-            && (uint64_t) (extractFloatx80Frac(a) << 1))
-        || (extractFloatx80Exp(b) == 0x7FFF
-            && (uint64_t) (extractFloatx80Frac(b) << 1))
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 1;
-    }
-    return 0;
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is
-| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
-| cause an exception.  The comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
-{
-
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
-       ) {
-        if (floatx80_is_signaling_nan(a, status)
-         || floatx80_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    return
-           ( a.low == b.low )
-        && (    ( a.high == b.high )
-             || (    ( a.low == 0 )
-                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
-           );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is less
-| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
-| do not cause an exception.  Otherwise, the comparison is performed according
-| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
-{
-    flag aSign, bSign;
-
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
-       ) {
-        if (floatx80_is_signaling_nan(a, status)
-         || floatx80_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    aSign = extractFloatx80Sign( a );
-    bSign = extractFloatx80Sign( b );
-    if ( aSign != bSign ) {
-        return
-               aSign
-            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
-                 == 0 );
-    }
-    return
-          aSign ? le128( b.high, b.low, a.high, a.low )
-        : le128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is less
-| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
-| an exception.  Otherwise, the comparison is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
-{
-    flag aSign, bSign;
-
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
-       ) {
-        if (floatx80_is_signaling_nan(a, status)
-         || floatx80_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    aSign = extractFloatx80Sign( a );
-    bSign = extractFloatx80Sign( b );
-    if ( aSign != bSign ) {
-        return
-               aSign
-            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
-                 != 0 );
-    }
-    return
-          aSign ? lt128( b.high, b.low, a.high, a.low )
-        : lt128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point values `a' and `b'
-| cannot be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.
-| The comparison is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
-{
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
-        float_raise(float_flag_invalid, status);
-        return 1;
-    }
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
-       ) {
-        if (floatx80_is_signaling_nan(a, status)
-         || floatx80_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 1;
-    }
-    return 0;
-}
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the quadruple-precision floating-point
 | value `a' to the 32-bit two's complement integer format.  The conversion
@@ -6662,7 +6002,7 @@ int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
 
 int32_t float128_to_int32(float128 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp, shiftCount;
     uint64_t aSig0, aSig1;
 
@@ -6691,7 +6031,7 @@ int32_t float128_to_int32(float128 a, float_status *status)
 
 int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp, shiftCount;
     uint64_t aSig0, aSig1, savedASig;
     int32_t z;
@@ -6741,7 +6081,7 @@ int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
 
 int64_t float128_to_int64(float128 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp, shiftCount;
     uint64_t aSig0, aSig1;
 
@@ -6784,7 +6124,7 @@ int64_t float128_to_int64(float128 a, float_status *status)
 
 int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp, shiftCount;
     uint64_t aSig0, aSig1;
     int64_t z;
@@ -6849,7 +6189,7 @@ int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
 
 uint64_t float128_to_uint64(float128 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int aExp;
     int shiftCount;
     uint64_t aSig0, aSig1;
@@ -6960,7 +6300,7 @@ uint32_t float128_to_uint32(float128 a, float_status *status)
 
 float32 float128_to_float32(float128 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig0, aSig1;
     uint32_t zSig;
@@ -6995,7 +6335,7 @@ float32 float128_to_float32(float128 a, float_status *status)
 
 float64 float128_to_float64(float128 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig0, aSig1;
 
@@ -7028,7 +6368,7 @@ float64 float128_to_float64(float128 a, float_status *status)
 
 floatx80 float128_to_floatx80(float128 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig0, aSig1;
 
@@ -7038,7 +6378,8 @@ floatx80 float128_to_floatx80(float128 a, float_status *status)
     aSign = extractFloat128Sign( a );
     if ( aExp == 0x7FFF ) {
         if ( aSig0 | aSig1 ) {
-            return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
+            floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
+            return floatx80_silence_nan(res, status);
         }
         return packFloatx80(aSign, floatx80_infinity_high,
                                    floatx80_infinity_low);
@@ -7064,7 +6405,7 @@ floatx80 float128_to_floatx80(float128 a, float_status *status)
 
 float128 float128_round_to_int(float128 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t lastBitMask, roundBitsMask;
     float128 z;
@@ -7161,6 +6502,8 @@ float128 float128_round_to_int(float128 a, float_status *status)
 
             case float_round_to_odd:
                 return packFloat128(aSign, 0x3FFF, 0, 0);
+            case float_round_to_zero:
+                break;
             }
             return packFloat128( aSign, 0, 0, 0 );
         }
@@ -7219,7 +6562,7 @@ float128 float128_round_to_int(float128 a, float_status *status)
 | Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
+static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
                                 float_status *status)
 {
     int32_t aExp, bExp, zExp;
@@ -7310,7 +6653,7 @@ static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
 | Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
+static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
                                 float_status *status)
 {
     int32_t aExp, bExp, zExp;
@@ -7398,7 +6741,7 @@ static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
 
 float128 float128_add(float128 a, float128 b, float_status *status)
 {
-    flag aSign, bSign;
+    bool aSign, bSign;
 
     aSign = extractFloat128Sign( a );
     bSign = extractFloat128Sign( b );
@@ -7419,7 +6762,7 @@ float128 float128_add(float128 a, float128 b, float_status *status)
 
 float128 float128_sub(float128 a, float128 b, float_status *status)
 {
-    flag aSign, bSign;
+    bool aSign, bSign;
 
     aSign = extractFloat128Sign( a );
     bSign = extractFloat128Sign( b );
@@ -7440,7 +6783,7 @@ float128 float128_sub(float128 a, float128 b, float_status *status)
 
 float128 float128_mul(float128 a, float128 b, float_status *status)
 {
-    flag aSign, bSign, zSign;
+    bool aSign, bSign, zSign;
     int32_t aExp, bExp, zExp;
     uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
 
@@ -7503,7 +6846,7 @@ float128 float128_mul(float128 a, float128 b, float_status *status)
 
 float128 float128_div(float128 a, float128 b, float_status *status)
 {
-    flag aSign, bSign, zSign;
+    bool aSign, bSign, zSign;
     int32_t aExp, bExp, zExp;
     uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
@@ -7590,7 +6933,7 @@ float128 float128_div(float128 a, float128 b, float_status *status)
 
 float128 float128_rem(float128 a, float128 b, float_status *status)
 {
-    flag aSign, zSign;
+    bool aSign, zSign;
     int32_t aExp, bExp, expDiff;
     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
@@ -7697,7 +7040,7 @@ float128 float128_rem(float128 a, float128 b, float_status *status)
 
 float128 float128_sqrt(float128 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp, zExp;
     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
@@ -7757,248 +7100,10 @@ float128 float128_sqrt(float128 a, float_status *status)
 
 }
 
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_eq(float128 a, float128 b, float_status *status)
-{
-
-    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
-              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
-         || (    ( extractFloat128Exp( b ) == 0x7FFF )
-              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    return
-           ( a.low == b.low )
-        && (    ( a.high == b.high )
-             || (    ( a.low == 0 )
-                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
-           );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is less than
-| or equal to the corresponding value `b', and 0 otherwise.  The invalid
-| exception is raised if either operand is a NaN.  The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_le(float128 a, float128 b, float_status *status)
-{
-    flag aSign, bSign;
-
-    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
-              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
-         || (    ( extractFloat128Exp( b ) == 0x7FFF )
-              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign != bSign ) {
-        return
-               aSign
-            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
-                 == 0 );
-    }
-    return
-          aSign ? le128( b.high, b.low, a.high, a.low )
-        : le128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  The comparison is performed according
-| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_lt(float128 a, float128 b, float_status *status)
-{
-    flag aSign, bSign;
-
-    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
-              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
-         || (    ( extractFloat128Exp( b ) == 0x7FFF )
-              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign != bSign ) {
-        return
-               aSign
-            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
-                 != 0 );
-    }
-    return
-          aSign ? lt128( b.high, b.low, a.high, a.low )
-        : lt128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise.  The invalid exception is raised if either
-| operand is a NaN. The comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_unordered(float128 a, float128 b, float_status *status)
-{
-    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
-              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
-         || (    ( extractFloat128Exp( b ) == 0x7FFF )
-              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
-       ) {
-        float_raise(float_flag_invalid, status);
-        return 1;
-    }
-    return 0;
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
-| exception.  The comparison is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_eq_quiet(float128 a, float128 b, float_status *status)
-{
-
-    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
-              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
-         || (    ( extractFloat128Exp( b ) == 0x7FFF )
-              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
-       ) {
-        if (float128_is_signaling_nan(a, status)
-         || float128_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    return
-           ( a.low == b.low )
-        && (    ( a.high == b.high )
-             || (    ( a.low == 0 )
-                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
-           );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is less than
-| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
-| cause an exception.  Otherwise, the comparison is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_le_quiet(float128 a, float128 b, float_status *status)
-{
-    flag aSign, bSign;
-
-    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
-              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
-         || (    ( extractFloat128Exp( b ) == 0x7FFF )
-              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
-       ) {
-        if (float128_is_signaling_nan(a, status)
-         || float128_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign != bSign ) {
-        return
-               aSign
-            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
-                 == 0 );
-    }
-    return
-          aSign ? le128( b.high, b.low, a.high, a.low )
-        : le128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
-| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_lt_quiet(float128 a, float128 b, float_status *status)
-{
-    flag aSign, bSign;
-
-    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
-              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
-         || (    ( extractFloat128Exp( b ) == 0x7FFF )
-              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
-       ) {
-        if (float128_is_signaling_nan(a, status)
-         || float128_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 0;
-    }
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign != bSign ) {
-        return
-               aSign
-            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
-                 != 0 );
-    }
-    return
-          aSign ? lt128( b.high, b.low, a.high, a.low )
-        : lt128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
-| comparison is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_unordered_quiet(float128 a, float128 b, float_status *status)
-{
-    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
-              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
-         || (    ( extractFloat128Exp( b ) == 0x7FFF )
-              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
-       ) {
-        if (float128_is_signaling_nan(a, status)
-         || float128_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return 1;
-    }
-    return 0;
-}
-
 static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
                                             int is_quiet, float_status *status)
 {
-    flag aSign, bSign;
+    bool aSign, bSign;
 
     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
         float_raise(float_flag_invalid, status);
@@ -8027,6 +7132,13 @@ static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
             return 1 - (2 * aSign);
         }
     } else {
+        /* Normalize pseudo-denormals before comparison */
+        if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
+            ++a.high;
+        }
+        if ((b.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
+            ++b.high;
+        }
         if (a.low == b.low && a.high == b.high) {
             return float_relation_equal;
         } else {
@@ -8035,20 +7147,20 @@ static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
     }
 }
 
-int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
+FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
 {
     return floatx80_compare_internal(a, b, 0, status);
 }
 
-int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
+FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
 {
     return floatx80_compare_internal(a, b, 1, status);
 }
 
-static inline int float128_compare_internal(float128 a, float128 b,
-                                            int is_quiet, float_status *status)
+static inline FloatRelation float128_compare_internal(float128 a, float128 b,
+                                            bool is_quiet, float_status *status)
 {
-    flag aSign, bSign;
+    bool aSign, bSign;
 
     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
@@ -8079,19 +7191,19 @@ static inline int float128_compare_internal(float128 a, float128 b,
     }
 }
 
-int float128_compare(float128 a, float128 b, float_status *status)
+FloatRelation float128_compare(float128 a, float128 b, float_status *status)
 {
     return float128_compare_internal(a, b, 0, status);
 }
 
-int float128_compare_quiet(float128 a, float128 b, float_status *status)
+FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *status)
 {
     return float128_compare_internal(a, b, 1, status);
 }
 
 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -8130,7 +7242,7 @@ floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
 
 float128 float128_scalbn(float128 a, int n, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig0, aSig1;
 
diff --git a/qemu/include/elf.h b/qemu/include/elf.h
index 8fbfe60e09..5b06b55f28 100644
--- a/qemu/include/elf.h
+++ b/qemu/include/elf.h
@@ -160,6 +160,8 @@ typedef struct mips_elf_abiflags_v0 {
 
 #define EM_CRIS         76      /* Axis Communications 32-bit embedded processor */
 
+#define EM_AVR          83      /* AVR 8-bit microcontroller */
+
 #define EM_V850		87	/* NEC v850 */
 
 #define EM_H8_300H      47      /* Hitachi H8/300H */
@@ -202,6 +204,8 @@ typedef struct mips_elf_abiflags_v0 {
 #define EM_MOXIE           223     /* Moxie processor family */
 #define EM_MOXIE_OLD       0xFEED
 
+#define EF_AVR_MACH     0x7F       /* Mask for AVR e_flags to get core type */
+
 /* This is the info that is needed to parse the dynamic section of the file */
 #define DT_NULL		0
 #define DT_NEEDED	1
diff --git a/qemu/include/exec/cpu-all.h b/qemu/include/exec/cpu-all.h
index ddac720740..48c7635daf 100644
--- a/qemu/include/exec/cpu-all.h
+++ b/qemu/include/exec/cpu-all.h
@@ -368,6 +368,7 @@ static inline bool tlb_hit(struct uc_struct *uc, target_ulong tlb_addr, target_u
     return tlb_hit_page(uc, tlb_addr, addr & TARGET_PAGE_MASK);
 }
 
+/* Returns: 0 on success, -1 on error */
 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
                         void *ptr, target_ulong len, bool is_write);
 
diff --git a/qemu/include/exec/cpu-common.h b/qemu/include/exec/cpu-common.h
index 28ba0e0e22..a532215518 100644
--- a/qemu/include/exec/cpu-common.h
+++ b/qemu/include/exec/cpu-common.h
@@ -31,9 +31,6 @@ typedef uintptr_t ram_addr_t;
 
 /* memory API */
 
-typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
-typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
-
 /* This should not be used by devices.  */
 ram_addr_t qemu_ram_addr_from_host(struct uc_struct *uc, void *ptr);
 RAMBlock *qemu_ram_block_from_host(struct uc_struct *uc, void *ptr,
diff --git a/qemu/include/exec/cpu-defs.h b/qemu/include/exec/cpu-defs.h
index 5c11015565..a7da99f7d0 100644
--- a/qemu/include/exec/cpu-defs.h
+++ b/qemu/include/exec/cpu-defs.h
@@ -96,8 +96,13 @@ typedef uint64_t target_ulong;
  * Skylake's Level-2 STLB has 16 1G entries.
  * Also, make sure we do not size the TLB past the guest's address space.
  */
-#  define CPU_TLB_DYN_MAX_BITS                                  \
+#  ifdef TARGET_PAGE_BITS_VARY
+#   define CPU_TLB_DYN_MAX_BITS                                  \
     MIN(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS)
+#  else
+#   define CPU_TLB_DYN_MAX_BITS                                  \
+    MIN_CONST(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS)
+#  endif
 # endif
 
 typedef struct CPUTLBEntry {
diff --git a/qemu/include/exec/cpu_ldst.h b/qemu/include/exec/cpu_ldst.h
index b8482bced1..29c21c6678 100644
--- a/qemu/include/exec/cpu_ldst.h
+++ b/qemu/include/exec/cpu_ldst.h
@@ -25,13 +25,13 @@
  *
  * The syntax for the accessors is:
  *
- * load:  cpu_ld{sign}{size}_{mmusuffix}(env, ptr)
- *        cpu_ld{sign}{size}_{mmusuffix}_ra(env, ptr, retaddr)
- *        cpu_ld{sign}{size}_mmuidx_ra(env, ptr, mmu_idx, retaddr)
+ * load:  cpu_ld{sign}{size}{end}_{mmusuffix}(env, ptr)
+ *        cpu_ld{sign}{size}{end}_{mmusuffix}_ra(env, ptr, retaddr)
+ *        cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmu_idx, retaddr)
  *
- * store: cpu_st{size}_{mmusuffix}(env, ptr, val)
- *        cpu_st{size}_{mmusuffix}_ra(env, ptr, val, retaddr)
- *        cpu_st{size}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr)
+ * store: cpu_st{size}{end}_{mmusuffix}(env, ptr, val)
+ *        cpu_st{size}{end}_{mmusuffix}_ra(env, ptr, val, retaddr)
+ *        cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr)
  *
  * sign is:
  * (empty): for 32 and 64 bit sizes
@@ -44,6 +44,11 @@
  *   l: 32 bits
  *   q: 64 bits
  *
+ * end is:
+ * (empty): for target native endian, or for 8 bit access
+ *     _be: for forced big endian
+ *     _le: for forced little endian
+ *
  * mmusuffix is one of the generic suffixes "data" or "code", or "mmuidx".
  * The "mmuidx" suffix carries an extra mmu_idx argument that specifies
  * the index to use; the "data" and "code" suffixes take the index from
@@ -59,32 +64,58 @@ typedef target_ulong abi_ptr;
 #define TARGET_ABI_FMT_ptr TARGET_ABI_FMT_lx
 
 uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr);
-uint32_t cpu_lduw_data(CPUArchState *env, abi_ptr ptr);
-uint32_t cpu_ldl_data(CPUArchState *env, abi_ptr ptr);
-uint64_t cpu_ldq_data(CPUArchState *env, abi_ptr ptr);
 int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr);
-int cpu_ldsw_data(CPUArchState *env, abi_ptr ptr);
 
-uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr);
-uint32_t cpu_lduw_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr);
-uint32_t cpu_ldl_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr);
-uint64_t cpu_ldq_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr);
-int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr);
-int cpu_ldsw_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr);
+uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr);
+int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr);
+uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr);
+uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr);
+
+uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr);
+int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr);
+uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr);
+uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr);
+
+uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
+int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
+
+uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
+int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
+uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
+uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
+
+uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
+int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
+uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
+uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 
 void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
-void cpu_stw_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
-void cpu_stl_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
-void cpu_stq_data(CPUArchState *env, abi_ptr ptr, uint64_t val);
+
+void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
+void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
+void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val);
+
+void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
+void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
+void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val);
 
 void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr,
                      uint32_t val, uintptr_t retaddr);
-void cpu_stw_data_ra(CPUArchState *env, abi_ptr ptr,
-                     uint32_t val, uintptr_t retaddr);
-void cpu_stl_data_ra(CPUArchState *env, abi_ptr ptr,
-                     uint32_t val, uintptr_t retaddr);
-void cpu_stq_data_ra(CPUArchState *env, abi_ptr ptr,
-                     uint64_t val, uintptr_t retaddr);
+
+void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr,
+                        uint32_t val, uintptr_t ra);
+void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr,
+                        uint32_t val, uintptr_t ra);
+void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr,
+                        uint64_t val, uintptr_t ra);
+
+void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr,
+                        uint32_t val, uintptr_t ra);
+void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr,
+                        uint32_t val, uintptr_t ra);
+void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr ptr,
+                        uint64_t val, uintptr_t ra);
+
 
 /* Needed for TCG_OVERSIZED_GUEST */
 #include "tcg/tcg.h"
@@ -98,9 +129,7 @@ static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
 static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
                                   target_ulong addr)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = env->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = env->uc;
     uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS;
 
     return (addr >> TARGET_PAGE_BITS) & size_mask;
@@ -115,27 +144,90 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
 
 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
                             int mmu_idx, uintptr_t ra);
-uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                            int mmu_idx, uintptr_t ra);
-uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                           int mmu_idx, uintptr_t ra);
-uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                           int mmu_idx, uintptr_t ra);
 
 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
                        int mmu_idx, uintptr_t ra);
-int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                       int mmu_idx, uintptr_t ra);
+
+uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                               int mmu_idx, uintptr_t ra);
+int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                          int mmu_idx, uintptr_t ra);
+uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra);
+uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra);
+
+uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                               int mmu_idx, uintptr_t ra);
+int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                          int mmu_idx, uintptr_t ra);
+uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra);
+uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra);
 
 void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
                        int mmu_idx, uintptr_t retaddr);
-void cpu_stw_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                       int mmu_idx, uintptr_t retaddr);
-void cpu_stl_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                       int mmu_idx, uintptr_t retaddr);
-void cpu_stq_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
-                       int mmu_idx, uintptr_t retaddr);
 
+void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr);
+void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr);
+void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
+                          int mmu_idx, uintptr_t retaddr);
+
+void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr);
+void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr);
+void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
+                          int mmu_idx, uintptr_t retaddr);
+
+#ifdef TARGET_WORDS_BIGENDIAN
+# define cpu_lduw_data        cpu_lduw_be_data
+# define cpu_ldsw_data        cpu_ldsw_be_data
+# define cpu_ldl_data         cpu_ldl_be_data
+# define cpu_ldq_data         cpu_ldq_be_data
+# define cpu_lduw_data_ra     cpu_lduw_be_data_ra
+# define cpu_ldsw_data_ra     cpu_ldsw_be_data_ra
+# define cpu_ldl_data_ra      cpu_ldl_be_data_ra
+# define cpu_ldq_data_ra      cpu_ldq_be_data_ra
+# define cpu_lduw_mmuidx_ra   cpu_lduw_be_mmuidx_ra
+# define cpu_ldsw_mmuidx_ra   cpu_ldsw_be_mmuidx_ra
+# define cpu_ldl_mmuidx_ra    cpu_ldl_be_mmuidx_ra
+# define cpu_ldq_mmuidx_ra    cpu_ldq_be_mmuidx_ra
+# define cpu_stw_data         cpu_stw_be_data
+# define cpu_stl_data         cpu_stl_be_data
+# define cpu_stq_data         cpu_stq_be_data
+# define cpu_stw_data_ra      cpu_stw_be_data_ra
+# define cpu_stl_data_ra      cpu_stl_be_data_ra
+# define cpu_stq_data_ra      cpu_stq_be_data_ra
+# define cpu_stw_mmuidx_ra    cpu_stw_be_mmuidx_ra
+# define cpu_stl_mmuidx_ra    cpu_stl_be_mmuidx_ra
+# define cpu_stq_mmuidx_ra    cpu_stq_be_mmuidx_ra
+#else
+# define cpu_lduw_data        cpu_lduw_le_data
+# define cpu_ldsw_data        cpu_ldsw_le_data
+# define cpu_ldl_data         cpu_ldl_le_data
+# define cpu_ldq_data         cpu_ldq_le_data
+# define cpu_lduw_data_ra     cpu_lduw_le_data_ra
+# define cpu_ldsw_data_ra     cpu_ldsw_le_data_ra
+# define cpu_ldl_data_ra      cpu_ldl_le_data_ra
+# define cpu_ldq_data_ra      cpu_ldq_le_data_ra
+# define cpu_lduw_mmuidx_ra   cpu_lduw_le_mmuidx_ra
+# define cpu_ldsw_mmuidx_ra   cpu_ldsw_le_mmuidx_ra
+# define cpu_ldl_mmuidx_ra    cpu_ldl_le_mmuidx_ra
+# define cpu_ldq_mmuidx_ra    cpu_ldq_le_mmuidx_ra
+# define cpu_stw_data         cpu_stw_le_data
+# define cpu_stl_data         cpu_stl_le_data
+# define cpu_stq_data         cpu_stq_le_data
+# define cpu_stw_data_ra      cpu_stw_le_data_ra
+# define cpu_stl_data_ra      cpu_stl_le_data_ra
+# define cpu_stq_data_ra      cpu_stq_le_data_ra
+# define cpu_stw_mmuidx_ra    cpu_stw_le_mmuidx_ra
+# define cpu_stl_mmuidx_ra    cpu_stl_le_mmuidx_ra
+# define cpu_stq_mmuidx_ra    cpu_stq_le_mmuidx_ra
+#endif
 
 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
diff --git a/qemu/include/exec/exec-all.h b/qemu/include/exec/exec-all.h
index 68c656787f..a717c75adc 100644
--- a/qemu/include/exec/exec-all.h
+++ b/qemu/include/exec/exec-all.h
@@ -108,6 +108,11 @@ void cpu_address_space_init(CPUState *cpu, int asidx, MemoryRegion *mr);
  * @cpu: CPU whose TLB should be initialized
  */
 void tlb_init(CPUState *cpu);
+/**
+ * tlb_destroy - destroy a CPU's TLB
+ * @cpu: CPU whose TLB should be destroyed
+ */
+void tlb_destroy(CPUState *cpu);
 /**
  * tlb_flush_page:
  * @cpu: CPU whose TLB should be flushed
@@ -264,6 +269,23 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
                   hwaddr paddr, int prot,
                   int mmu_idx, target_ulong size);
+/**
+ * probe_access:
+ * @env: CPUArchState
+ * @addr: guest virtual address to look up
+ * @size: size of the access
+ * @access_type: read, write or execute permission
+ * @mmu_idx: MMU index to use for lookup
+ * @retaddr: return address for unwinding
+ *
+ * Look up the guest virtual address @addr.  Raise an exception if the
+ * page does not satisfy @access_type.  Raise an exception if the
+ * access (@addr, @size) hits a watchpoint.  For writes, mark a clean
+ * page as dirty.
+ *
+ * Finally, return the host address for a page that is backed by RAM,
+ * or NULL if the page requires I/O.
+ */
 void *probe_access(CPUArchState *env, target_ulong addr, int size,
                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr);
 
@@ -279,6 +301,28 @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,
     return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
 }
 
+/**
+ * probe_access_flags:
+ * @env: CPUArchState
+ * @addr: guest virtual address to look up
+ * @access_type: read, write or execute permission
+ * @mmu_idx: MMU index to use for lookup
+ * @nonfault: suppress the fault
+ * @phost: return value for host address
+ * @retaddr: return address for unwinding
+ *
+ * Similar to probe_access, loosely returning the TLB_FLAGS_MASK for
+ * the page, and storing the host address for RAM in @phost.
+ *
+ * If @nonfault is set, do not raise an exception but return TLB_INVALID_MASK.
+ * Do not handle watchpoints, but include TLB_WATCHPOINT in the returned flags.
+ * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags.
+ * For simplicity, all "mmio-like" flags are folded to TLB_MMIO.
+ */
+int probe_access_flags(CPUArchState *env, target_ulong addr,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool nonfault, void **phost, uintptr_t retaddr);
+
 #define CODE_GEN_ALIGN           16 /* must be >= of the size of a icache line */
 
 /* Estimated block size for TB allocation.  */
diff --git a/qemu/include/fpu/softfloat-helpers.h b/qemu/include/fpu/softfloat-helpers.h
index e0baf24c8f..9ddecba70f 100644
--- a/qemu/include/fpu/softfloat-helpers.h
+++ b/qemu/include/fpu/softfloat-helpers.h
@@ -53,12 +53,12 @@ this code that are retained.
 
 #include "fpu/softfloat-types.h"
 
-static inline void set_float_detect_tininess(int val, float_status *status)
+static inline void set_float_detect_tininess(bool val, float_status *status)
 {
-    status->float_detect_tininess = val;
+    status->tininess_before_rounding = val;
 }
 
-static inline void set_float_rounding_mode(int val, float_status *status)
+static inline void set_float_rounding_mode(FloatRoundMode val, float_status *status)
 {
     status->float_rounding_mode = val;
 }
@@ -74,32 +74,32 @@ static inline void set_floatx80_rounding_precision(int val,
     status->floatx80_rounding_precision = val;
 }
 
-static inline void set_flush_to_zero(flag val, float_status *status)
+static inline void set_flush_to_zero(bool val, float_status *status)
 {
     status->flush_to_zero = val;
 }
 
-static inline void set_flush_inputs_to_zero(flag val, float_status *status)
+static inline void set_flush_inputs_to_zero(bool val, float_status *status)
 {
     status->flush_inputs_to_zero = val;
 }
 
-static inline void set_default_nan_mode(flag val, float_status *status)
+static inline void set_default_nan_mode(bool val, float_status *status)
 {
     status->default_nan_mode = val;
 }
 
-static inline void set_snan_bit_is_one(flag val, float_status *status)
+static inline void set_snan_bit_is_one(bool val, float_status *status)
 {
     status->snan_bit_is_one = val;
 }
 
 static inline int get_float_detect_tininess(float_status *status)
 {
-    return status->float_detect_tininess;
+    return status->tininess_before_rounding;
 }
 
-static inline int get_float_rounding_mode(float_status *status)
+static inline FloatRoundMode get_float_rounding_mode(float_status *status)
 {
     return status->float_rounding_mode;
 }
@@ -114,17 +114,17 @@ static inline int get_floatx80_rounding_precision(float_status *status)
     return status->floatx80_rounding_precision;
 }
 
-static inline flag get_flush_to_zero(float_status *status)
+static inline bool get_flush_to_zero(float_status *status)
 {
     return status->flush_to_zero;
 }
 
-static inline flag get_flush_inputs_to_zero(float_status *status)
+static inline bool get_flush_inputs_to_zero(float_status *status)
 {
     return status->flush_inputs_to_zero;
 }
 
-static inline flag get_default_nan_mode(float_status *status)
+static inline bool get_default_nan_mode(float_status *status)
 {
     return status->default_nan_mode;
 }
diff --git a/qemu/include/fpu/softfloat-macros.h b/qemu/include/fpu/softfloat-macros.h
index afae4f7404..38d8c97dce 100644
--- a/qemu/include/fpu/softfloat-macros.h
+++ b/qemu/include/fpu/softfloat-macros.h
@@ -756,10 +756,10 @@ static inline uint32_t estimateSqrt32(int aExp, uint32_t a)
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/
 
-static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+static inline bool eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
-    return ( a0 == b0 ) && ( a1 == b1 );
+    return a0 == b0 && a1 == b1;
 
 }
 
@@ -769,10 +769,10 @@ static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/
 
-static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+static inline bool le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
-    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+    return a0 < b0 || (a0 == b0 && a1 <= b1);
 
 }
 
@@ -782,10 +782,10 @@ static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 | returns 0.
 *----------------------------------------------------------------------------*/
 
-static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+static inline bool lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
-    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+    return a0 < b0 || (a0 == b0 && a1 < b1);
 
 }
 
@@ -795,10 +795,10 @@ static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/
 
-static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+static inline bool ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
-    return ( a0 != b0 ) || ( a1 != b1 );
+    return a0 != b0 || a1 != b1;
 
 }
 
diff --git a/qemu/include/fpu/softfloat-types.h b/qemu/include/fpu/softfloat-types.h
index 565dced559..00bc527d4a 100644
--- a/qemu/include/fpu/softfloat-types.h
+++ b/qemu/include/fpu/softfloat-types.h
@@ -82,12 +82,6 @@ this code that are retained.
 
 #include <stdint.h>
 
-/* This 'flag' type must be able to hold at least 0 and 1. It should
- * probably be replaced with 'bool' but the uses would need to be audited
- * to check that they weren't accidentally relying on it being a larger type.
- */
-typedef uint8_t flag;
-
 /*
  * Software IEC/IEEE floating-point types.
  */
@@ -124,16 +118,25 @@ typedef struct {
  * Software IEC/IEEE floating-point underflow tininess-detection mode.
  */
 
-enum {
-    float_tininess_after_rounding  = 0,
-    float_tininess_before_rounding = 1
-};
+#define float_tininess_after_rounding  false
+#define float_tininess_before_rounding true
 
 /*
  *Software IEC/IEEE floating-point rounding mode.
  */
 
-enum {
+#ifdef _MSC_VER
+#define ENUM_PACKED \
+    __pragma(pack(push, 1)) \
+    enum
+#define ENUM_PACKED_END \
+    __pragma(pack(pop))
+#else
+#define ENUM_PACKED enum __attribute__((packed))
+#define ENUM_PACKED_END
+#endif
+
+typedef ENUM_PACKED {
     float_round_nearest_even = 0,
     float_round_down         = 1,
     float_round_up           = 2,
@@ -141,7 +144,7 @@ enum {
     float_round_ties_away    = 4,
     /* Not an IEEE rounding mode: round to the closest odd mantissa value */
     float_round_to_odd       = 5,
-};
+} ENUM_PACKED_END FloatRoundMode;
 
 /*
  * Software IEC/IEEE floating-point exception flags.
@@ -166,17 +169,17 @@ enum {
  */
 
 typedef struct float_status {
-    signed char float_detect_tininess;
-    signed char float_rounding_mode;
+    FloatRoundMode float_rounding_mode;
     uint8_t     float_exception_flags;
     signed char floatx80_rounding_precision;
+    bool tininess_before_rounding;
     /* should denormalised results go to zero and set the inexact flag? */
-    flag flush_to_zero;
+    bool flush_to_zero;
     /* should denormalised inputs go to zero and set the input_denormal flag? */
-    flag flush_inputs_to_zero;
-    flag default_nan_mode;
+    bool flush_inputs_to_zero;
+    bool default_nan_mode;
     /* not always used -- see snan_bit_is_one() in softfloat-specialize.h */
-    flag snan_bit_is_one;
+    bool snan_bit_is_one;
 } float_status;
 
 #endif /* SOFTFLOAT_TYPES_H */
diff --git a/qemu/include/fpu/softfloat.h b/qemu/include/fpu/softfloat.h
index ecb8ba0114..76d023725c 100644
--- a/qemu/include/fpu/softfloat.h
+++ b/qemu/include/fpu/softfloat.h
@@ -85,12 +85,12 @@ this code that are retained.
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE floating-point ordering relations
 *----------------------------------------------------------------------------*/
-enum {
+typedef enum {
     float_relation_less      = -1,
     float_relation_equal     =  0,
     float_relation_greater   =  1,
     float_relation_unordered =  2
-};
+} FloatRelation;
 
 #include "fpu/softfloat-types.h"
 #include "fpu/softfloat-helpers.h"
@@ -186,9 +186,9 @@ float32 float16_to_float32(float16, bool ieee, float_status *status);
 float16 float64_to_float16(float64 a, bool ieee, float_status *status);
 float64 float16_to_float64(float16 a, bool ieee, float_status *status);
 
-int16_t float16_to_int16_scalbn(float16, int, int, float_status *status);
-int32_t float16_to_int32_scalbn(float16, int, int, float_status *status);
-int64_t float16_to_int64_scalbn(float16, int, int, float_status *status);
+int16_t float16_to_int16_scalbn(float16, FloatRoundMode, int, float_status *status);
+int32_t float16_to_int32_scalbn(float16, FloatRoundMode, int, float_status *status);
+int64_t float16_to_int64_scalbn(float16, FloatRoundMode, int, float_status *status);
 
 int16_t float16_to_int16(float16, float_status *status);
 int32_t float16_to_int32(float16, float_status *status);
@@ -198,9 +198,9 @@ int16_t float16_to_int16_round_to_zero(float16, float_status *status);
 int32_t float16_to_int32_round_to_zero(float16, float_status *status);
 int64_t float16_to_int64_round_to_zero(float16, float_status *status);
 
-uint16_t float16_to_uint16_scalbn(float16 a, int, int, float_status *status);
-uint32_t float16_to_uint32_scalbn(float16 a, int, int, float_status *status);
-uint64_t float16_to_uint64_scalbn(float16 a, int, int, float_status *status);
+uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode, int, float_status *status);
+uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode, int, float_status *status);
+uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode, int, float_status *status);
 
 uint16_t float16_to_uint16(float16 a, float_status *status);
 uint32_t float16_to_uint32(float16 a, float_status *status);
@@ -228,34 +228,34 @@ float16 float16_maxnum(float16, float16, float_status *status);
 float16 float16_minnummag(float16, float16, float_status *status);
 float16 float16_maxnummag(float16, float16, float_status *status);
 float16 float16_sqrt(float16, float_status *status);
-int float16_compare(float16, float16, float_status *status);
-int float16_compare_quiet(float16, float16, float_status *status);
+FloatRelation float16_compare(float16, float16, float_status *status);
+FloatRelation float16_compare_quiet(float16, float16, float_status *status);
 
-int float16_is_quiet_nan(float16, float_status *status);
-int float16_is_signaling_nan(float16, float_status *status);
+bool float16_is_quiet_nan(float16, float_status *status);
+bool float16_is_signaling_nan(float16, float_status *status);
 float16 float16_silence_nan(float16, float_status *status);
 
-static inline int float16_is_any_nan(float16 a)
+static inline bool float16_is_any_nan(float16 a)
 {
     return ((float16_val(a) & ~0x8000) > 0x7c00);
 }
 
-static inline int float16_is_neg(float16 a)
+static inline bool float16_is_neg(float16 a)
 {
     return float16_val(a) >> 15;
 }
 
-static inline int float16_is_infinity(float16 a)
+static inline bool float16_is_infinity(float16 a)
 {
     return (float16_val(a) & 0x7fff) == 0x7c00;
 }
 
-static inline int float16_is_zero(float16 a)
+static inline bool float16_is_zero(float16 a)
 {
     return (float16_val(a) & 0x7fff) == 0;
 }
 
-static inline int float16_is_zero_or_denormal(float16 a)
+static inline bool float16_is_zero_or_denormal(float16 a)
 {
     return (float16_val(a) & 0x7c00) == 0;
 }
@@ -298,9 +298,9 @@ float16 float16_default_nan(float_status *status);
 | Software IEC/IEEE single-precision conversion routines.
 *----------------------------------------------------------------------------*/
 
-int16_t float32_to_int16_scalbn(float32, int, int, float_status *status);
-int32_t float32_to_int32_scalbn(float32, int, int, float_status *status);
-int64_t float32_to_int64_scalbn(float32, int, int, float_status *status);
+int16_t float32_to_int16_scalbn(float32, FloatRoundMode, int, float_status *status);
+int32_t float32_to_int32_scalbn(float32, FloatRoundMode, int, float_status *status);
+int64_t float32_to_int64_scalbn(float32, FloatRoundMode, int, float_status *status);
 
 int16_t float32_to_int16(float32, float_status *status);
 int32_t float32_to_int32(float32, float_status *status);
@@ -310,9 +310,9 @@ int16_t float32_to_int16_round_to_zero(float32, float_status *status);
 int32_t float32_to_int32_round_to_zero(float32, float_status *status);
 int64_t float32_to_int64_round_to_zero(float32, float_status *status);
 
-uint16_t float32_to_uint16_scalbn(float32, int, int, float_status *status);
-uint32_t float32_to_uint32_scalbn(float32, int, int, float_status *status);
-uint64_t float32_to_uint64_scalbn(float32, int, int, float_status *status);
+uint16_t float32_to_uint16_scalbn(float32, FloatRoundMode, int, float_status *status);
+uint32_t float32_to_uint32_scalbn(float32, FloatRoundMode, int, float_status *status);
+uint64_t float32_to_uint64_scalbn(float32, FloatRoundMode, int, float_status *status);
 
 uint16_t float32_to_uint16(float32, float_status *status);
 uint32_t float32_to_uint32(float32, float_status *status);
@@ -339,24 +339,16 @@ float32 float32_muladd(float32, float32, float32, int, float_status *status);
 float32 float32_sqrt(float32, float_status *status);
 float32 float32_exp2(float32, float_status *status);
 float32 float32_log2(float32, float_status *status);
-int float32_eq(float32, float32, float_status *status);
-int float32_le(float32, float32, float_status *status);
-int float32_lt(float32, float32, float_status *status);
-int float32_unordered(float32, float32, float_status *status);
-int float32_eq_quiet(float32, float32, float_status *status);
-int float32_le_quiet(float32, float32, float_status *status);
-int float32_lt_quiet(float32, float32, float_status *status);
-int float32_unordered_quiet(float32, float32, float_status *status);
-int float32_compare(float32, float32, float_status *status);
-int float32_compare_quiet(float32, float32, float_status *status);
+FloatRelation float32_compare(float32, float32, float_status *status);
+FloatRelation float32_compare_quiet(float32, float32, float_status *status);
 float32 float32_min(float32, float32, float_status *status);
 float32 float32_max(float32, float32, float_status *status);
 float32 float32_minnum(float32, float32, float_status *status);
 float32 float32_maxnum(float32, float32, float_status *status);
 float32 float32_minnummag(float32, float32, float_status *status);
 float32 float32_maxnummag(float32, float32, float_status *status);
-int float32_is_quiet_nan(float32, float_status *status);
-int float32_is_signaling_nan(float32, float_status *status);
+bool float32_is_quiet_nan(float32, float_status *status);
+bool float32_is_signaling_nan(float32, float_status *status);
 float32 float32_silence_nan(float32, float_status *status);
 float32 float32_scalbn(float32, int, float_status *status);
 
@@ -376,27 +368,27 @@ static inline float32 float32_chs(float32 a)
     return make_float32(float32_val(a) ^ 0x80000000);
 }
 
-static inline int float32_is_infinity(float32 a)
+static inline bool float32_is_infinity(float32 a)
 {
     return (float32_val(a) & 0x7fffffff) == 0x7f800000;
 }
 
-static inline int float32_is_neg(float32 a)
+static inline bool float32_is_neg(float32 a)
 {
     return float32_val(a) >> 31;
 }
 
-static inline int float32_is_zero(float32 a)
+static inline bool float32_is_zero(float32 a)
 {
     return (float32_val(a) & 0x7fffffff) == 0;
 }
 
-static inline int float32_is_any_nan(float32 a)
+static inline bool float32_is_any_nan(float32 a)
 {
     return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL);
 }
 
-static inline int float32_is_zero_or_denormal(float32 a)
+static inline bool float32_is_zero_or_denormal(float32 a)
 {
     return (float32_val(a) & 0x7f800000) == 0;
 }
@@ -421,6 +413,47 @@ static inline float32 float32_set_sign(float32 a, int sign)
     return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31));
 }
 
+static inline bool float32_eq(float32 a, float32 b, float_status *s)
+{
+    return float32_compare(a, b, s) == float_relation_equal;
+}
+
+static inline bool float32_le(float32 a, float32 b, float_status *s)
+{
+    return float32_compare(a, b, s) <= float_relation_equal;
+}
+
+static inline bool float32_lt(float32 a, float32 b, float_status *s)
+{
+    return float32_compare(a, b, s) < float_relation_equal;
+}
+
+static inline bool float32_unordered(float32 a, float32 b, float_status *s)
+{
+    return float32_compare(a, b, s) == float_relation_unordered;
+}
+
+static inline bool float32_eq_quiet(float32 a, float32 b, float_status *s)
+{
+    return float32_compare_quiet(a, b, s) == float_relation_equal;
+}
+
+static inline bool float32_le_quiet(float32 a, float32 b, float_status *s)
+{
+    return float32_compare_quiet(a, b, s) <= float_relation_equal;
+}
+
+static inline bool float32_lt_quiet(float32 a, float32 b, float_status *s)
+{
+    return float32_compare_quiet(a, b, s) < float_relation_equal;
+}
+
+static inline bool float32_unordered_quiet(float32 a, float32 b,
+                                           float_status *s)
+{
+    return float32_compare_quiet(a, b, s) == float_relation_unordered;
+}
+
 #define float32_zero make_float32(0)
 #define float32_half make_float32(0x3f000000)
 #define float32_one make_float32(0x3f800000)
@@ -440,7 +473,7 @@ static inline float32 float32_set_sign(float32 a, int sign)
 | significand.
 *----------------------------------------------------------------------------*/
 
-static inline float32 packFloat32(flag zSign, int zExp, uint32_t zSig)
+static inline float32 packFloat32(bool zSign, int zExp, uint32_t zSig)
 {
     return make_float32(
           (((uint32_t)zSign) << 31) + (((uint32_t)zExp) << 23) + zSig);
@@ -455,9 +488,9 @@ float32 float32_default_nan(float_status *status);
 | Software IEC/IEEE double-precision conversion routines.
 *----------------------------------------------------------------------------*/
 
-int16_t float64_to_int16_scalbn(float64, int, int, float_status *status);
-int32_t float64_to_int32_scalbn(float64, int, int, float_status *status);
-int64_t float64_to_int64_scalbn(float64, int, int, float_status *status);
+int16_t float64_to_int16_scalbn(float64, FloatRoundMode, int, float_status *status);
+int32_t float64_to_int32_scalbn(float64, FloatRoundMode, int, float_status *status);
+int64_t float64_to_int64_scalbn(float64, FloatRoundMode, int, float_status *status);
 
 int16_t float64_to_int16(float64, float_status *status);
 int32_t float64_to_int32(float64, float_status *status);
@@ -467,9 +500,9 @@ int16_t float64_to_int16_round_to_zero(float64, float_status *status);
 int32_t float64_to_int32_round_to_zero(float64, float_status *status);
 int64_t float64_to_int64_round_to_zero(float64, float_status *status);
 
-uint16_t float64_to_uint16_scalbn(float64, int, int, float_status *status);
-uint32_t float64_to_uint32_scalbn(float64, int, int, float_status *status);
-uint64_t float64_to_uint64_scalbn(float64, int, int, float_status *status);
+uint16_t float64_to_uint16_scalbn(float64, FloatRoundMode, int, float_status *status);
+uint32_t float64_to_uint32_scalbn(float64, FloatRoundMode, int, float_status *status);
+uint64_t float64_to_uint64_scalbn(float64, FloatRoundMode, int, float_status *status);
 
 uint16_t float64_to_uint16(float64, float_status *status);
 uint32_t float64_to_uint32(float64, float_status *status);
@@ -495,24 +528,16 @@ float64 float64_rem(float64, float64, float_status *status);
 float64 float64_muladd(float64, float64, float64, int, float_status *status);
 float64 float64_sqrt(float64, float_status *status);
 float64 float64_log2(float64, float_status *status);
-int float64_eq(float64, float64, float_status *status);
-int float64_le(float64, float64, float_status *status);
-int float64_lt(float64, float64, float_status *status);
-int float64_unordered(float64, float64, float_status *status);
-int float64_eq_quiet(float64, float64, float_status *status);
-int float64_le_quiet(float64, float64, float_status *status);
-int float64_lt_quiet(float64, float64, float_status *status);
-int float64_unordered_quiet(float64, float64, float_status *status);
-int float64_compare(float64, float64, float_status *status);
-int float64_compare_quiet(float64, float64, float_status *status);
+FloatRelation float64_compare(float64, float64, float_status *status);
+FloatRelation float64_compare_quiet(float64, float64, float_status *status);
 float64 float64_min(float64, float64, float_status *status);
 float64 float64_max(float64, float64, float_status *status);
 float64 float64_minnum(float64, float64, float_status *status);
 float64 float64_maxnum(float64, float64, float_status *status);
 float64 float64_minnummag(float64, float64, float_status *status);
 float64 float64_maxnummag(float64, float64, float_status *status);
-int float64_is_quiet_nan(float64 a, float_status *status);
-int float64_is_signaling_nan(float64, float_status *status);
+bool float64_is_quiet_nan(float64 a, float_status *status);
+bool float64_is_signaling_nan(float64, float_status *status);
 float64 float64_silence_nan(float64, float_status *status);
 float64 float64_scalbn(float64, int, float_status *status);
 
@@ -532,27 +557,27 @@ static inline float64 float64_chs(float64 a)
     return make_float64(float64_val(a) ^ 0x8000000000000000LL);
 }
 
-static inline int float64_is_infinity(float64 a)
+static inline bool float64_is_infinity(float64 a)
 {
     return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL;
 }
 
-static inline int float64_is_neg(float64 a)
+static inline bool float64_is_neg(float64 a)
 {
     return float64_val(a) >> 63;
 }
 
-static inline int float64_is_zero(float64 a)
+static inline bool float64_is_zero(float64 a)
 {
     return (float64_val(a) & 0x7fffffffffffffffLL) == 0;
 }
 
-static inline int float64_is_any_nan(float64 a)
+static inline bool float64_is_any_nan(float64 a)
 {
     return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL);
 }
 
-static inline int float64_is_zero_or_denormal(float64 a)
+static inline bool float64_is_zero_or_denormal(float64 a)
 {
     return (float64_val(a) & 0x7ff0000000000000LL) == 0;
 }
@@ -578,6 +603,47 @@ static inline float64 float64_set_sign(float64 a, int sign)
                         | ((int64_t)sign << 63));
 }
 
+static inline bool float64_eq(float64 a, float64 b, float_status *s)
+{
+    return float64_compare(a, b, s) == float_relation_equal;
+}
+
+static inline bool float64_le(float64 a, float64 b, float_status *s)
+{
+    return float64_compare(a, b, s) <= float_relation_equal;
+}
+
+static inline bool float64_lt(float64 a, float64 b, float_status *s)
+{
+    return float64_compare(a, b, s) < float_relation_equal;
+}
+
+static inline bool float64_unordered(float64 a, float64 b, float_status *s)
+{
+    return float64_compare(a, b, s) == float_relation_unordered;
+}
+
+static inline bool float64_eq_quiet(float64 a, float64 b, float_status *s)
+{
+    return float64_compare_quiet(a, b, s) == float_relation_equal;
+}
+
+static inline bool float64_le_quiet(float64 a, float64 b, float_status *s)
+{
+    return float64_compare_quiet(a, b, s) <= float_relation_equal;
+}
+
+static inline bool float64_lt_quiet(float64 a, float64 b, float_status *s)
+{
+    return float64_compare_quiet(a, b, s) < float_relation_equal;
+}
+
+static inline bool float64_unordered_quiet(float64 a, float64 b,
+                                           float_status *s)
+{
+    return float64_compare_quiet(a, b, s) == float_relation_unordered;
+}
+
 #define float64_zero make_float64(0)
 #define float64_half make_float64(0x3fe0000000000000LL)
 #define float64_one make_float64(0x3ff0000000000000LL)
@@ -617,18 +683,12 @@ floatx80 floatx80_add(floatx80, floatx80, float_status *status);
 floatx80 floatx80_sub(floatx80, floatx80, float_status *status);
 floatx80 floatx80_mul(floatx80, floatx80, float_status *status);
 floatx80 floatx80_div(floatx80, floatx80, float_status *status);
+floatx80 floatx80_modrem(floatx80, floatx80, bool, uint64_t *, float_status *status);
+floatx80 floatx80_mod(floatx80, floatx80, float_status *status);
 floatx80 floatx80_rem(floatx80, floatx80, float_status *status);
 floatx80 floatx80_sqrt(floatx80, float_status *status);
-int floatx80_eq(floatx80, floatx80, float_status *status);
-int floatx80_le(floatx80, floatx80, float_status *status);
-int floatx80_lt(floatx80, floatx80, float_status *status);
-int floatx80_unordered(floatx80, floatx80, float_status *status);
-int floatx80_eq_quiet(floatx80, floatx80, float_status *status);
-int floatx80_le_quiet(floatx80, floatx80, float_status *status);
-int floatx80_lt_quiet(floatx80, floatx80, float_status *status);
-int floatx80_unordered_quiet(floatx80, floatx80, float_status *status);
-int floatx80_compare(floatx80, floatx80, float_status *status);
-int floatx80_compare_quiet(floatx80, floatx80, float_status *status);
+FloatRelation floatx80_compare(floatx80, floatx80, float_status *status);
+FloatRelation floatx80_compare_quiet(floatx80, floatx80, float_status *status);
 int floatx80_is_quiet_nan(floatx80, float_status *status);
 int floatx80_is_signaling_nan(floatx80, float_status *status);
 floatx80 floatx80_silence_nan(floatx80, float_status *status);
@@ -646,7 +706,7 @@ static inline floatx80 floatx80_chs(floatx80 a)
     return a;
 }
 
-static inline int floatx80_is_infinity(floatx80 a)
+static inline bool floatx80_is_infinity(floatx80 a)
 {
 #if defined(TARGET_M68K)
     return (a.high & 0x7fff) == floatx80_infinity.high && !(a.low << 1);
@@ -656,26 +716,67 @@ static inline int floatx80_is_infinity(floatx80 a)
 #endif
 }
 
-static inline int floatx80_is_neg(floatx80 a)
+static inline bool floatx80_is_neg(floatx80 a)
 {
     return a.high >> 15;
 }
 
-static inline int floatx80_is_zero(floatx80 a)
+static inline bool floatx80_is_zero(floatx80 a)
 {
     return (a.high & 0x7fff) == 0 && a.low == 0;
 }
 
-static inline int floatx80_is_zero_or_denormal(floatx80 a)
+static inline bool floatx80_is_zero_or_denormal(floatx80 a)
 {
     return (a.high & 0x7fff) == 0;
 }
 
-static inline int floatx80_is_any_nan(floatx80 a)
+static inline bool floatx80_is_any_nan(floatx80 a)
 {
     return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
 }
 
+static inline bool floatx80_eq(floatx80 a, floatx80 b, float_status *s)
+{
+    return floatx80_compare(a, b, s) == float_relation_equal;
+}
+
+static inline bool floatx80_le(floatx80 a, floatx80 b, float_status *s)
+{
+    return floatx80_compare(a, b, s) <= float_relation_equal;
+}
+
+static inline bool floatx80_lt(floatx80 a, floatx80 b, float_status *s)
+{
+    return floatx80_compare(a, b, s) < float_relation_equal;
+}
+
+static inline bool floatx80_unordered(floatx80 a, floatx80 b, float_status *s)
+{
+    return floatx80_compare(a, b, s) == float_relation_unordered;
+}
+
+static inline bool floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *s)
+{
+    return floatx80_compare_quiet(a, b, s) == float_relation_equal;
+}
+
+static inline bool floatx80_le_quiet(floatx80 a, floatx80 b, float_status *s)
+{
+    return floatx80_compare_quiet(a, b, s) <= float_relation_equal;
+}
+
+static inline bool floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *s)
+{
+    return floatx80_compare_quiet(a, b, s) < float_relation_equal;
+}
+
+static inline bool floatx80_unordered_quiet(floatx80 a, floatx80 b,
+                                           float_status *s)
+{
+    return floatx80_compare_quiet(a, b, s) == float_relation_unordered;
+}
+
 /*----------------------------------------------------------------------------
 | Return whether the given value is an invalid floatx80 encoding.
 | Invalid floatx80 encodings arise when the integer bit is not set, but
@@ -688,10 +789,35 @@ static inline int floatx80_is_any_nan(floatx80 a)
 *----------------------------------------------------------------------------*/
 static inline bool floatx80_invalid_encoding(floatx80 a)
 {
+#if defined(TARGET_M68K)
+    /*-------------------------------------------------------------------------
+    | With m68k, the explicit integer bit can be zero in the case of:
+    | - zeros                (exp == 0, mantissa == 0)
+    | - denormalized numbers (exp == 0, mantissa != 0)
+    | - unnormalized numbers (exp != 0, exp < 0x7FFF)
+    | - infinities           (exp == 0x7FFF, mantissa == 0)
+    | - not-a-numbers        (exp == 0x7FFF, mantissa != 0)
+    |
+    | For infinities and NaNs, the explicit integer bit can be either one or
+    | zero.
+    |
+    | The IEEE 754 standard does not define a zero integer bit. Such a number
+    | is an unnormalized number. Hardware does not directly support
+    | denormalized and unnormalized numbers, but implicitly supports them by
+    | trapping them as unimplemented data types, allowing efficient conversion
+    | in software.
+    |
+    | See "M68000 FAMILY PROGRAMMER’S REFERENCE MANUAL",
+    |     "1.6 FLOATING-POINT DATA TYPES"
+    *------------------------------------------------------------------------*/
+    return false;
+#else
     return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0;
+#endif
 }
 
 #define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
+#define floatx80_zero_init make_floatx80_init(0x0000, 0x0000000000000000LL)
 #define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
 #define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
 #define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
@@ -722,7 +848,7 @@ static inline int32_t extractFloatx80Exp(floatx80 a)
 | `a'.
 *----------------------------------------------------------------------------*/
 
-static inline flag extractFloatx80Sign(floatx80 a)
+static inline bool extractFloatx80Sign(floatx80 a)
 {
     return a.high >> 15;
 }
@@ -732,7 +858,7 @@ static inline flag extractFloatx80Sign(floatx80 a)
 | extended double-precision floating-point value, returning the result.
 *----------------------------------------------------------------------------*/
 
-static inline floatx80 packFloatx80(flag zSign, int32_t zExp, uint64_t zSig)
+static inline floatx80 packFloatx80(bool zSign, int32_t zExp, uint64_t zSig)
 {
     floatx80 z;
 
@@ -783,7 +909,7 @@ floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status);
 | Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
+floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
                               float_status *status);
 
@@ -797,7 +923,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
 *----------------------------------------------------------------------------*/
 
 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
-                                       flag zSign, int32_t zExp,
+                                       bool zSign, int32_t zExp,
                                        uint64_t zSig0, uint64_t zSig1,
                                        float_status *status);
 
@@ -831,18 +957,10 @@ float128 float128_mul(float128, float128, float_status *status);
 float128 float128_div(float128, float128, float_status *status);
 float128 float128_rem(float128, float128, float_status *status);
 float128 float128_sqrt(float128, float_status *status);
-int float128_eq(float128, float128, float_status *status);
-int float128_le(float128, float128, float_status *status);
-int float128_lt(float128, float128, float_status *status);
-int float128_unordered(float128, float128, float_status *status);
-int float128_eq_quiet(float128, float128, float_status *status);
-int float128_le_quiet(float128, float128, float_status *status);
-int float128_lt_quiet(float128, float128, float_status *status);
-int float128_unordered_quiet(float128, float128, float_status *status);
-int float128_compare(float128, float128, float_status *status);
-int float128_compare_quiet(float128, float128, float_status *status);
-int float128_is_quiet_nan(float128, float_status *status);
-int float128_is_signaling_nan(float128, float_status *status);
+FloatRelation float128_compare(float128, float128, float_status *status);
+FloatRelation float128_compare_quiet(float128, float128, float_status *status);
+bool float128_is_quiet_nan(float128, float_status *status);
+bool float128_is_signaling_nan(float128, float_status *status);
 float128 float128_silence_nan(float128, float_status *status);
 float128 float128_scalbn(float128, int, float_status *status);
 
@@ -858,22 +976,22 @@ static inline float128 float128_chs(float128 a)
     return a;
 }
 
-static inline int float128_is_infinity(float128 a)
+static inline bool float128_is_infinity(float128 a)
 {
     return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0;
 }
 
-static inline int float128_is_neg(float128 a)
+static inline bool float128_is_neg(float128 a)
 {
     return a.high >> 63;
 }
 
-static inline int float128_is_zero(float128 a)
+static inline bool float128_is_zero(float128 a)
 {
     return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0;
 }
 
-static inline int float128_is_zero_or_denormal(float128 a)
+static inline bool float128_is_zero_or_denormal(float128 a)
 {
     return (a.high & 0x7fff000000000000LL) == 0;
 }
@@ -888,12 +1006,53 @@ static inline bool float128_is_denormal(float128 a)
     return float128_is_zero_or_denormal(a) && !float128_is_zero(a);
 }
 
-static inline int float128_is_any_nan(float128 a)
+static inline bool float128_is_any_nan(float128 a)
 {
     return ((a.high >> 48) & 0x7fff) == 0x7fff &&
         ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0));
 }
 
+static inline bool float128_eq(float128 a, float128 b, float_status *s)
+{
+    return float128_compare(a, b, s) == float_relation_equal;
+}
+
+static inline bool float128_le(float128 a, float128 b, float_status *s)
+{
+    return float128_compare(a, b, s) <= float_relation_equal;
+}
+
+static inline bool float128_lt(float128 a, float128 b, float_status *s)
+{
+    return float128_compare(a, b, s) < float_relation_equal;
+}
+
+static inline bool float128_unordered(float128 a, float128 b, float_status *s)
+{
+    return float128_compare(a, b, s) == float_relation_unordered;
+}
+
+static inline bool float128_eq_quiet(float128 a, float128 b, float_status *s)
+{
+    return float128_compare_quiet(a, b, s) == float_relation_equal;
+}
+
+static inline bool float128_le_quiet(float128 a, float128 b, float_status *s)
+{
+    return float128_compare_quiet(a, b, s) <= float_relation_equal;
+}
+
+static inline bool float128_lt_quiet(float128 a, float128 b, float_status *s)
+{
+    return float128_compare_quiet(a, b, s) < float_relation_equal;
+}
+
+static inline bool float128_unordered_quiet(float128 a, float128 b,
+                                           float_status *s)
+{
+    return float128_compare_quiet(a, b, s) == float_relation_unordered;
+}
+
 #define float128_zero make_float128(0, 0)
 
 /*----------------------------------------------------------------------------
diff --git a/qemu/include/hw/registerfields.h b/qemu/include/hw/registerfields.h
index 686aca1225..972876f98c 100644
--- a/qemu/include/hw/registerfields.h
+++ b/qemu/include/hw/registerfields.h
@@ -66,30 +66,30 @@
 #define FIELD_DP8(storage, reg, field, val, d) {                          \
     struct {                                                              \
         unsigned int v:R_ ## reg ## _ ## field ## _LENGTH;                \
-    } v = { .v = val };                                                   \
+    } _v = { .v = val };                                                  \
     d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT,           \
-                  R_ ## reg ## _ ## field ## _LENGTH, v.v);               \
+                  R_ ## reg ## _ ## field ## _LENGTH, _v.v);              \
     }
 #define FIELD_DP16(storage, reg, field, val, d) {                         \
     struct {                                                              \
         unsigned int v:R_ ## reg ## _ ## field ## _LENGTH;                \
-    } v = { .v = val };                                                   \
+    } _v = { .v = val };                                                  \
     d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT,           \
-                  R_ ## reg ## _ ## field ## _LENGTH, v.v);               \
+                  R_ ## reg ## _ ## field ## _LENGTH, _v.v);              \
     }
-#define FIELD_DP32(storage, reg, field, val, d) {                         \
-    struct {                                                              \
-        unsigned int v:R_ ## reg ## _ ## field ## _LENGTH;                \
-    } v = { .v = val };                                                   \
-    d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT,           \
-                  R_ ## reg ## _ ## field ## _LENGTH, v.v);               \
+#define FIELD_DP32(storage, reg, field, val, d) {                        \
+    struct {                                                             \
+        unsigned int v:R_ ## reg ## _ ## field ## _LENGTH;               \
+    } v = { .v = val };                                                  \
+    d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT,          \
+                  R_ ## reg ## _ ## field ## _LENGTH, v.v);              \
     }
-#define FIELD_DP64(storage, reg, field, val, d) {                         \
-    struct {                                                              \
-        unsigned int v:R_ ## reg ## _ ## field ## _LENGTH;                \
-    } v = { .v = val };                                                   \
-    d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT,           \
-                  R_ ## reg ## _ ## field ## _LENGTH, v.v);               \
+#define FIELD_DP64(storage, reg, field, val, d) {                        \
+    struct {                                                             \
+        unsigned int v:R_ ## reg ## _ ## field ## _LENGTH;               \
+    } v = { .v = val };                                                  \
+    d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT,          \
+                  R_ ## reg ## _ ## field ## _LENGTH, v.v);              \
     }
 
 /* Deposit a field to array of registers.  */
diff --git a/qemu/include/qemu/bswap.h b/qemu/include/qemu/bswap.h
index 7591f6c88e..5afcf853f0 100644
--- a/qemu/include/qemu/bswap.h
+++ b/qemu/include/qemu/bswap.h
@@ -9,6 +9,8 @@
 # include <machine/bswap.h>
 #elif defined(__FreeBSD__)
 # include <sys/endian.h>
+#elif defined(__HAIKU__)
+# include <endian.h>
 #elif defined(CONFIG_BYTESWAP_H)
 # include <byteswap.h>
 
diff --git a/qemu/include/qemu/compiler.h b/qemu/include/qemu/compiler.h
index 971aa12721..e0cb4b3dd1 100644
--- a/qemu/include/qemu/compiler.h
+++ b/qemu/include/qemu/compiler.h
@@ -89,6 +89,8 @@ static union MSVC_FLOAT_HACK __NAN = {{0x00, 0x00, 0xC0, 0x7F}};
 #define QEMU_FLATTEN
 #define QEMU_ALWAYS_INLINE  __declspec(inline)
 
+#define qemu_build_not_reached() __assume(0)
+
 #else  // Unix compilers
 
 #ifndef NAN
diff --git a/qemu/include/qemu/host-utils.h b/qemu/include/qemu/host-utils.h
index 0c5b30ff67..50063ffffe 100644
--- a/qemu/include/qemu/host-utils.h
+++ b/qemu/include/qemu/host-utils.h
@@ -100,8 +100,8 @@ static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
     }
 }
 #else
-void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b);
-void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b);
+void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
+void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
 int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
 int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
 
diff --git a/qemu/include/qemu/osdep.h b/qemu/include/qemu/osdep.h
index ad18b8ddd6..80d0869acd 100644
--- a/qemu/include/qemu/osdep.h
+++ b/qemu/include/qemu/osdep.h
@@ -96,7 +96,7 @@ struct uc_struct;
 #include <setjmp.h>
 #include <signal.h>
 
-#ifdef __OpenBSD__
+#ifdef HAVE_SYS_SIGNAL_H
 #include <sys/signal.h>
 #endif
 
@@ -189,6 +189,9 @@ struct uc_struct;
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS MAP_ANON
 #endif
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE 0
+#endif
 #ifndef ENOMEDIUM
 #define ENOMEDIUM ENODEV
 #endif
@@ -252,18 +255,72 @@ struct uc_struct;
 #define SIZE_MAX ((size_t)-1)
 #endif
 
-#ifndef MIN
-#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+/*
+ * Two variations of MIN/MAX macros. The first is for runtime use, and
+ * evaluates arguments only once (so it is safe even with side
+ * effects), but will not work in constant contexts (such as array
+ * size declarations) because of the '{}'.  The second is for constant
+ * expression use, where evaluating arguments twice is safe because
+ * the result is going to be constant anyway, but will not work in a
+ * runtime context because of a void expression where a value is
+ * expected.  Thus, both gcc and clang will fail to compile if you use
+ * the wrong macro (even if the error may seem a bit cryptic).
+ *
+ * Note that neither form is usable as an #if condition; if you truly
+ * need to write conditional code that depends on a minimum or maximum
+ * determined by the pre-processor instead of the compiler, you'll
+ * have to open-code it.  Sadly, Coverity is severely confused by the
+ * constant variants, so we have to dumb things down there.
+ */
+#undef MIN
+#ifdef _MSC_VER
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#else
+#define MIN(a, b)                                       \
+    ({                                                  \
+        typeof(1 ? (a) : (b)) _a = (a), _b = (b);       \
+        _a < _b ? _a : _b;                              \
+    })
+#endif
+
+#undef MAX
+#ifdef _MSC_VER
+    // MSVC version
+    #define MAX(a, b) ((a) > (b) ? (a) : (b))
+#else
+    // GCC/Clang version with statement expression
+    #define MAX(a, b) ({ \
+        typeof(1 ? (a) : (b)) _a = (a), _b = (b); \
+        _a > _b ? _a : _b; \
+    })
 #endif
-#ifndef MAX
-#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+#if defined(__COVERITY__) || defined(_MSC_VER)
+# define MIN_CONST(a, b) ((a) < (b) ? (a) : (b))
+# define MAX_CONST(a, b) ((a) > (b) ? (a) : (b))
+#else
+# define MIN_CONST(a, b)                                        \
+    __builtin_choose_expr(                                      \
+        __builtin_constant_p(a) && __builtin_constant_p(b),     \
+        (a) < (b) ? (a) : (b),                                  \
+        ((void)0))
+# define MAX_CONST(a, b)                                        \
+    __builtin_choose_expr(                                      \
+        __builtin_constant_p(a) && __builtin_constant_p(b),     \
+        (a) > (b) ? (a) : (b),                                  \
+        ((void)0))
 #endif
 
-/* Minimum function that returns zero only iff both values are zero.
- * Intended for use with unsigned values only. */
+/*
+ * Minimum function that returns zero only if both values are zero.
+ * Intended for use with unsigned values only.
+ */
 #ifndef MIN_NON_ZERO
-#define MIN_NON_ZERO(a, b) ((a) == 0 ? (b) : \
-                                ((b) == 0 ? (a) : (MIN(a, b))))
+#define MIN_NON_ZERO(a, b)                              \
+    ({                                                  \
+        typeof(1 ? (a) : (b)) _a = (a), _b = (b);       \
+        _a == 0 ? _b : (_b == 0 || _b > _a) ? _a : _b;  \
+    })
 #endif
 
 /* Round number down to multiple */
@@ -408,7 +465,7 @@ void qemu_anon_ram_free(struct uc_struct *uc, void *ptr, size_t size);
 #define HAVE_CHARDEV_SERIAL 1
 #elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__)    \
     || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
-    || defined(__GLIBC__)
+    || defined(__GLIBC__) || defined(__APPLE__)
 #define HAVE_CHARDEV_SERIAL 1
 #endif
 
@@ -417,6 +474,10 @@ void qemu_anon_ram_free(struct uc_struct *uc, void *ptr, size_t size);
 #define HAVE_CHARDEV_PARPORT 1
 #endif
 
+#if defined(__HAIKU__)
+#define SIGIO SIGPOLL
+#endif
+
 #if defined(CONFIG_LINUX)
 #ifndef BUS_MCEERR_AR
 #define BUS_MCEERR_AR 4
diff --git a/qemu/include/tcg/tcg-op-gvec.h b/qemu/include/tcg/tcg-op-gvec.h
index dd414fc768..5610e89f99 100644
--- a/qemu/include/tcg/tcg-op-gvec.h
+++ b/qemu/include/tcg/tcg-op-gvec.h
@@ -39,56 +39,61 @@ void tcg_gen_gvec_2_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
                         gen_helper_gvec_2 *fn);
 
 /* Similarly, passing an extra data value.  */
-typedef void gen_helper_gvec_2i(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
-void tcg_gen_gvec_2i_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, TCGv_i64 c,
-                         uint32_t oprsz, uint32_t maxsz, int32_t data,
-                         gen_helper_gvec_2i *fn);
+typedef void gen_helper_gvec_2i(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_i64,
+                                TCGv_i32);
+void tcg_gen_gvec_2i_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                         TCGv_i64 c, uint32_t oprsz, uint32_t maxsz,
+                         int32_t data, gen_helper_gvec_2i *fn);
 
 /* Similarly, passing an extra pointer (e.g. env or float_status).  */
-typedef void gen_helper_gvec_2_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void gen_helper_gvec_2_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr,
+                                   TCGv_i32);
 void tcg_gen_gvec_2_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
                         TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
                         int32_t data, gen_helper_gvec_2_ptr *fn);
 
 /* Similarly, with three vector operands.  */
-typedef void gen_helper_gvec_3(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
-void tcg_gen_gvec_3_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs,
-                        uint32_t oprsz, uint32_t maxsz, int32_t data,
-                        gen_helper_gvec_3 *fn);
+typedef void gen_helper_gvec_3(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr,
+                               TCGv_i32);
+void tcg_gen_gvec_3_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz,
+                        int32_t data, gen_helper_gvec_3 *fn);
 
 /* Similarly, with four vector operands.  */
 typedef void gen_helper_gvec_4(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr,
                                TCGv_ptr, TCGv_i32);
-void tcg_gen_gvec_4_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs,
-                        uint32_t cofs, uint32_t oprsz, uint32_t maxsz,
-                        int32_t data, gen_helper_gvec_4 *fn);
+void tcg_gen_gvec_4_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t cofs, uint32_t oprsz,
+                        uint32_t maxsz, int32_t data, gen_helper_gvec_4 *fn);
 
 /* Similarly, with five vector operands.  */
-typedef void gen_helper_gvec_5(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
-                               TCGv_ptr, TCGv_i32);
-void tcg_gen_gvec_5_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs,
-                        uint32_t cofs, uint32_t xofs, uint32_t oprsz,
-                        uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn);
+typedef void gen_helper_gvec_5(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr,
+                               TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_5_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t cofs, uint32_t xofs,
+                        uint32_t oprsz, uint32_t maxsz, int32_t data,
+                        gen_helper_gvec_5 *fn);
 
 typedef void gen_helper_gvec_3_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr,
                                    TCGv_ptr, TCGv_i32);
-void tcg_gen_gvec_3_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs,
-                        TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
-                        int32_t data, gen_helper_gvec_3_ptr *fn);
+void tcg_gen_gvec_3_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, TCGv_ptr ptr, uint32_t oprsz,
+                        uint32_t maxsz, int32_t data,
+                        gen_helper_gvec_3_ptr *fn);
 
 typedef void gen_helper_gvec_4_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr,
                                    TCGv_ptr, TCGv_ptr, TCGv_i32);
-void tcg_gen_gvec_4_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs,
-                        uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz,
-                        uint32_t maxsz, int32_t data,
+void tcg_gen_gvec_4_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t cofs, TCGv_ptr ptr,
+                        uint32_t oprsz, uint32_t maxsz, int32_t data,
                         gen_helper_gvec_4_ptr *fn);
 
-typedef void gen_helper_gvec_5_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
-                                   TCGv_ptr, TCGv_ptr, TCGv_i32);
-void tcg_gen_gvec_5_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs,
-                        uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
-                        uint32_t oprsz, uint32_t maxsz, int32_t data,
-                        gen_helper_gvec_5_ptr *fn);
+typedef void gen_helper_gvec_5_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr,
+                                   TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_5_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t cofs, uint32_t eofs,
+                        TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
+                        int32_t data, gen_helper_gvec_5_ptr *fn);
 
 /* Expand a gvec operation.  Either inline or out-of-line depending on
    the actual vector size and the operations supported by the host.  */
@@ -109,6 +114,8 @@ typedef struct {
     uint8_t vece;
     /* Prefer i64 to v64.  */
     bool prefer_i64;
+    /* Load dest as a 2nd source operand.  */
+    bool load_dest;
 } GVecGen2;
 
 typedef struct {
@@ -201,7 +208,8 @@ typedef struct {
     void (*fni8)(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64);
     void (*fni4)(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
     /* Expand inline with a host vector type.  */
-    void (*fniv)(TCGContext *, unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec);
+    void (*fniv)(TCGContext *, unsigned, TCGv_vec, TCGv_vec, TCGv_vec,
+                 TCGv_vec);
     /* Expand out-of-line helper w/descriptor.  */
     gen_helper_gvec_4 *fno;
     /* The optional opcodes, if any, utilized by .fniv.  */
@@ -218,110 +226,146 @@ typedef struct {
 
 void tcg_gen_gvec_2(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
                     uint32_t oprsz, uint32_t maxsz, const GVecGen2 *);
-void tcg_gen_gvec_2i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz,
-                     uint32_t maxsz, int64_t c, const GVecGen2i *);
-void tcg_gen_gvec_2s(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz,
-                     uint32_t maxsz, TCGv_i64 c, const GVecGen2s *);
-void tcg_gen_gvec_3(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs,
-                    uint32_t oprsz, uint32_t maxsz, const GVecGen3 *);
-void tcg_gen_gvec_3i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs,
+void tcg_gen_gvec_2i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
                      uint32_t oprsz, uint32_t maxsz, int64_t c,
+                     const GVecGen2i *);
+void tcg_gen_gvec_2s(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                     uint32_t oprsz, uint32_t maxsz, TCGv_i64 c,
+                     const GVecGen2s *);
+void tcg_gen_gvec_3(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                    uint32_t bofs, uint32_t oprsz, uint32_t maxsz,
+                    const GVecGen3 *);
+void tcg_gen_gvec_3i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                     uint32_t bofs, uint32_t oprsz, uint32_t maxsz, int64_t c,
                      const GVecGen3i *);
-void tcg_gen_gvec_4(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
-                    uint32_t oprsz, uint32_t maxsz, const GVecGen4 *);
+void tcg_gen_gvec_4(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
+                    uint32_t bofs, uint32_t cofs, uint32_t oprsz,
+                    uint32_t maxsz, const GVecGen4 *);
 
 /* Expand a specific vector operation.  */
 
-void tcg_gen_gvec_mov(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_not(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_neg(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_abs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t oprsz, uint32_t maxsz);
-
-void tcg_gen_gvec_add(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_sub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_mul(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-
-void tcg_gen_gvec_addi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       int64_t c, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_muli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       int64_t c, uint32_t oprsz, uint32_t maxsz);
-
-void tcg_gen_gvec_adds(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_subs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_muls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_mov(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_not(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_neg(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_abs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_add(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                      uint32_t maxsz);
+void tcg_gen_gvec_sub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                      uint32_t maxsz);
+void tcg_gen_gvec_mul(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                      uint32_t maxsz);
+
+void tcg_gen_gvec_addi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, int64_t c, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_muli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, int64_t c, uint32_t oprsz,
+                       uint32_t maxsz);
+
+void tcg_gen_gvec_adds(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, TCGv_i64 c, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_subs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, TCGv_i64 c, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_muls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, TCGv_i64 c, uint32_t oprsz,
+                       uint32_t maxsz);
 
 /* Saturated arithmetic.  */
-void tcg_gen_gvec_ssadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_sssub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_usadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_ussub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_ssadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                        uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                        uint32_t maxsz);
+void tcg_gen_gvec_sssub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                        uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                        uint32_t maxsz);
+void tcg_gen_gvec_usadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                        uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                        uint32_t maxsz);
+void tcg_gen_gvec_ussub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                        uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                        uint32_t maxsz);
 
 /* Min/max.  */
-void tcg_gen_gvec_smin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_umin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_smax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_umax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-
-void tcg_gen_gvec_and(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_or(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                     uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_xor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_andc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_orc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_nand(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_nor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_eqv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
-
-void tcg_gen_gvec_andi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       int64_t c, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_xori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       int64_t c, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_ori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      int64_t c, uint32_t oprsz, uint32_t maxsz);
-
-void tcg_gen_gvec_ands(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_xors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
-void tcg_gen_gvec_ors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                      TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
-
-void tcg_gen_gvec_dup_mem(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
-                          uint32_t s, uint32_t m);
+void tcg_gen_gvec_smin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_umin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_smax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_umax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                       uint32_t maxsz);
+
+void tcg_gen_gvec_and(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                      uint32_t maxsz);
+void tcg_gen_gvec_or(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                     uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                     uint32_t maxsz);
+void tcg_gen_gvec_xor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                      uint32_t maxsz);
+void tcg_gen_gvec_andc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_orc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                      uint32_t maxsz);
+void tcg_gen_gvec_nand(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_nor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                      uint32_t maxsz);
+void tcg_gen_gvec_eqv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t bofs, uint32_t oprsz,
+                      uint32_t maxsz);
+
+void tcg_gen_gvec_andi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, int64_t c, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_xori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, int64_t c, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_ori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, int64_t c, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_ands(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, TCGv_i64 c, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_xors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, TCGv_i64 c, uint32_t oprsz,
+                       uint32_t maxsz);
+void tcg_gen_gvec_ors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, TCGv_i64 c, uint32_t oprsz,
+                      uint32_t maxsz);
+
+void tcg_gen_gvec_dup_mem(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs,
+                          uint32_t aofs, uint32_t s, uint32_t m);
+void tcg_gen_gvec_dup_imm(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t s,
+                          uint32_t m, uint64_t imm);
 void tcg_gen_gvec_dup_i32(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t s,
                           uint32_t m, TCGv_i32);
 void tcg_gen_gvec_dup_i64(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t s,
                           uint32_t m, TCGv_i64);
 
-void tcg_gen_gvec_dup8i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint8_t x);
-void tcg_gen_gvec_dup16i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint16_t x);
-void tcg_gen_gvec_dup32i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint32_t x);
-void tcg_gen_gvec_dup64i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint64_t x);
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i64
+#else
+#define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i32
+#endif
 
 void tcg_gen_gvec_shli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz);
@@ -329,6 +373,10 @@ void tcg_gen_gvec_shri(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32
                        int64_t shift, uint32_t oprsz, uint32_t maxsz);
 void tcg_gen_gvec_sari(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotri(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        int64_t shift, uint32_t oprsz, uint32_t maxsz);
 
 void tcg_gen_gvec_shls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
                        TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
@@ -336,6 +384,8 @@ void tcg_gen_gvec_shrs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32
                        TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
 void tcg_gen_gvec_sars(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
                        TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
 
 /*
  * Perform vector shift by vector element, modulo the element size.
@@ -347,6 +397,10 @@ void tcg_gen_gvec_shrv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32
                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
 void tcg_gen_gvec_sarv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotlv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotrv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
 
 void tcg_gen_gvec_cmp(TCGContext *tcg_ctx, TCGCond cond, unsigned vece, uint32_t dofs,
                       uint32_t aofs, uint32_t bofs,
@@ -383,5 +437,7 @@ void tcg_gen_vec_shr8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t)
 void tcg_gen_vec_shr16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t);
 void tcg_gen_vec_sar8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t);
 void tcg_gen_vec_sar16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_rotl8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c);
+void tcg_gen_vec_rotl16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c);
 
 #endif
diff --git a/qemu/include/tcg/tcg-op.h b/qemu/include/tcg/tcg-op.h
index 93026d1d51..5b9685da03 100644
--- a/qemu/include/tcg/tcg-op.h
+++ b/qemu/include/tcg/tcg-op.h
@@ -359,9 +359,9 @@ void tcg_gen_ctzi_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, uint32_t
 void tcg_gen_clrsb_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ctpop_i32(TCGContext *tcg_ctx, TCGv_i32 a1, TCGv_i32 a2);
 void tcg_gen_rotl_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
-void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
+void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
 void tcg_gen_rotr_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
-void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
+void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
 void tcg_gen_deposit_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
                          unsigned int ofs, unsigned int len);
 void tcg_gen_deposit_z_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg,
@@ -569,9 +569,9 @@ void tcg_gen_ctzi_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, uint64_t
 void tcg_gen_clrsb_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ctpop_i64(TCGContext *tcg_ctx, TCGv_i64 a1, TCGv_i64 a2);
 void tcg_gen_rotl_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
-void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
+void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
 void tcg_gen_rotr_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
-void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
+void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
 void tcg_gen_deposit_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
                          unsigned int ofs, unsigned int len);
 void tcg_gen_deposit_z_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg,
@@ -1078,14 +1078,19 @@ void tcg_gen_umax_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a
 void tcg_gen_shli_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
 void tcg_gen_shri_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
 void tcg_gen_sari_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_rotli_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_rotri_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
 
 void tcg_gen_shls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
 void tcg_gen_shrs_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
 void tcg_gen_sars_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
+void tcg_gen_rotls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
 
 void tcg_gen_shlv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
 void tcg_gen_shrv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
 void tcg_gen_sarv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
+void tcg_gen_rotlv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
+void tcg_gen_rotrv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
 
 void tcg_gen_cmp_vec(TCGContext *tcg_ctx, TCGCond cond, unsigned vece, TCGv_vec r,
                      TCGv_vec a, TCGv_vec b);
diff --git a/qemu/include/tcg/tcg-opc.h b/qemu/include/tcg/tcg-opc.h
index 22033870bf..a583ca4900 100644
--- a/qemu/include/tcg/tcg-opc.h
+++ b/qemu/include/tcg/tcg-opc.h
@@ -270,19 +270,28 @@ DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
 DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
 DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
 DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
+DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
 
 DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
 DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
 DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
+DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec))
 
 #ifdef _MSC_VER
-DEF(shlv_vec, 1, 2, 0, IMPLVEC)
-DEF(shrv_vec, 1, 2, 0, IMPLVEC)
-DEF(sarv_vec, 1, 2, 0, IMPLVEC)
+// For MSVC, pre-compute the flags since it can't evaluate the OR at compile time
+#define VEC_FLAGS (TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
+DEF(shlv_vec, 1, 2, 0,  VEC_FLAGS)
+DEF(shrv_vec, 1, 2, 0,  VEC_FLAGS)
+DEF(sarv_vec, 1, 2, 0,  VEC_FLAGS)
+DEF(rotlv_vec, 1, 2, 0, VEC_FLAGS)
+DEF(rotrv_vec, 1, 2, 0, VEC_FLAGS)
+#undef VEC_FLAGS
 #else
-DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
-DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
-DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+DEF(shlv_vec, 1, 2, 0,  IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+DEF(shrv_vec, 1, 2, 0,  IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+DEF(sarv_vec, 1, 2, 0,  IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
+DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
 #endif
 
 DEF(cmp_vec, 1, 2, 1, IMPLVEC)
diff --git a/qemu/include/tcg/tcg.h b/qemu/include/tcg/tcg.h
index ade583e43f..966103e25d 100644
--- a/qemu/include/tcg/tcg.h
+++ b/qemu/include/tcg/tcg.h
@@ -182,6 +182,9 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_not_vec          0
 #define TCG_TARGET_HAS_andc_vec         0
 #define TCG_TARGET_HAS_orc_vec          0
+#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         0
 #define TCG_TARGET_HAS_shi_vec          0
 #define TCG_TARGET_HAS_shs_vec          0
 #define TCG_TARGET_HAS_shv_vec          0
@@ -721,7 +724,7 @@ struct TCGContext {
     void *tb_ret_addr;
 
     /* target/riscv/translate.c */
-    TCGv cpu_gpr[32], cpu_pc; // also target/mips/translate.c
+    TCGv cpu_gpr[32], cpu_pc, cpu_vl; // also target/mips/translate.c
     TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
     TCGv load_res;
     TCGv load_val;
diff --git a/qemu/m68k.h b/qemu/m68k.h
index 1b1703d19c..065357bbe1 100644
--- a/qemu/m68k.h
+++ b/qemu/m68k.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_m68k
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_m68k
 #define tcg_gen_st_i64 tcg_gen_st_i64_m68k
+#define tcg_gen_add_i64 tcg_gen_add_i64_m68k
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_m68k
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_m68k
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_m68k
 #define cpu_icount_to_ns cpu_icount_to_ns_m68k
 #define cpu_is_stopped cpu_is_stopped_m68k
 #define cpu_get_ticks cpu_get_ticks_m68k
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_m68k
 #define floatx80_mul floatx80_mul_m68k
 #define floatx80_div floatx80_div_m68k
+#define floatx80_modrem floatx80_modrem_m68k
+#define floatx80_mod floatx80_mod_m68k
 #define floatx80_rem floatx80_rem_m68k
 #define floatx80_sqrt floatx80_sqrt_m68k
 #define floatx80_eq floatx80_eq_m68k
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_m68k
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_m68k
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_m68k
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_m68k
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_m68k
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_m68k
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_m68k
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_m68k
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_m68k
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_m68k
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_m68k
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_m68k
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_m68k
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_m68k
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_m68k
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_m68k
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_m68k
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_m68k
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_m68k
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_m68k
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_m68k
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_m68k
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_m68k
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_m68k
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_m68k
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_m68k
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_m68k
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_m68k
 #define tcg_gen_shri_vec tcg_gen_shri_vec_m68k
 #define tcg_gen_sari_vec tcg_gen_sari_vec_m68k
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_m68k
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_m68k
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_m68k
 #define tcg_gen_add_vec tcg_gen_add_vec_m68k
 #define tcg_gen_sub_vec tcg_gen_sub_vec_m68k
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_m68k
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_m68k
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_m68k
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_m68k
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_m68k
 #define tcg_gen_shls_vec tcg_gen_shls_vec_m68k
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_m68k
 #define tcg_gen_sars_vec tcg_gen_sars_vec_m68k
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_m68k
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_m68k
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_m68k
 #define tb_htable_lookup tb_htable_lookup_m68k
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_m68k
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_m68k
 #define tlb_init tlb_init_m68k
+#define tlb_destroy tlb_destroy_m68k
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_m68k
 #define tlb_flush tlb_flush_m68k
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_m68k
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_m68k
 #define get_page_addr_code_hostp get_page_addr_code_hostp_m68k
 #define get_page_addr_code get_page_addr_code_m68k
+#define probe_access_flags probe_access_flags_m68k
 #define probe_access probe_access_m68k
 #define tlb_vaddr_to_host tlb_vaddr_to_host_m68k
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_m68k
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_m68k
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_m68k
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_m68k
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_m68k
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_m68k
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_m68k
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_m68k
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_m68k
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_m68k
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_m68k
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_m68k
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_m68k
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_m68k
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_m68k
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_m68k
 #define cpu_ldub_data_ra cpu_ldub_data_ra_m68k
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_m68k
-#define cpu_lduw_data_ra cpu_lduw_data_ra_m68k
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_m68k
-#define cpu_ldl_data_ra cpu_ldl_data_ra_m68k
-#define cpu_ldq_data_ra cpu_ldq_data_ra_m68k
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_m68k
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_m68k
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_m68k
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_m68k
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_m68k
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_m68k
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_m68k
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_m68k
 #define cpu_ldub_data cpu_ldub_data_m68k
 #define cpu_ldsb_data cpu_ldsb_data_m68k
-#define cpu_lduw_data cpu_lduw_data_m68k
-#define cpu_ldsw_data cpu_ldsw_data_m68k
-#define cpu_ldl_data cpu_ldl_data_m68k
-#define cpu_ldq_data cpu_ldq_data_m68k
+#define cpu_lduw_be_data cpu_lduw_be_data_m68k
+#define cpu_lduw_le_data cpu_lduw_le_data_m68k
+#define cpu_ldsw_be_data cpu_ldsw_be_data_m68k
+#define cpu_ldsw_le_data cpu_ldsw_le_data_m68k
+#define cpu_ldl_be_data cpu_ldl_be_data_m68k
+#define cpu_ldl_le_data cpu_ldl_le_data_m68k
+#define cpu_ldq_le_data cpu_ldq_le_data_m68k
+#define cpu_ldq_be_data cpu_ldq_be_data_m68k
 #define helper_ret_stb_mmu helper_ret_stb_mmu_m68k
 #define helper_le_stw_mmu helper_le_stw_mmu_m68k
 #define helper_be_stw_mmu helper_be_stw_mmu_m68k
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_m68k
 #define helper_be_stq_mmu helper_be_stq_mmu_m68k
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_m68k
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_m68k
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_m68k
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_m68k
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_m68k
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_m68k
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_m68k
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_m68k
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_m68k
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_m68k
 #define cpu_stb_data_ra cpu_stb_data_ra_m68k
-#define cpu_stw_data_ra cpu_stw_data_ra_m68k
-#define cpu_stl_data_ra cpu_stl_data_ra_m68k
-#define cpu_stq_data_ra cpu_stq_data_ra_m68k
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_m68k
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_m68k
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_m68k
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_m68k
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_m68k
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_m68k
 #define cpu_stb_data cpu_stb_data_m68k
-#define cpu_stw_data cpu_stw_data_m68k
-#define cpu_stl_data cpu_stl_data_m68k
-#define cpu_stq_data cpu_stq_data_m68k
+#define cpu_stw_be_data cpu_stw_be_data_m68k
+#define cpu_stw_le_data cpu_stw_le_data_m68k
+#define cpu_stl_be_data cpu_stl_be_data_m68k
+#define cpu_stl_le_data cpu_stl_le_data_m68k
+#define cpu_stq_be_data cpu_stq_be_data_m68k
+#define cpu_stq_le_data cpu_stq_le_data_m68k
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_m68k
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_m68k
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_m68k
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_m68k
 #define cpu_ldl_code cpu_ldl_code_m68k
 #define cpu_ldq_code cpu_ldq_code_m68k
+#define cpu_interrupt_handler cpu_interrupt_handler_m68k
 #define helper_div_i32 helper_div_i32_m68k
 #define helper_rem_i32 helper_rem_i32_m68k
 #define helper_divu_i32 helper_divu_i32_m68k
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_m68k
 #define helper_gvec_sar32i helper_gvec_sar32i_m68k
 #define helper_gvec_sar64i helper_gvec_sar64i_m68k
+#define helper_gvec_rotl8i helper_gvec_rotl8i_m68k
+#define helper_gvec_rotl16i helper_gvec_rotl16i_m68k
+#define helper_gvec_rotl32i helper_gvec_rotl32i_m68k
+#define helper_gvec_rotl64i helper_gvec_rotl64i_m68k
 #define helper_gvec_shl8v helper_gvec_shl8v_m68k
 #define helper_gvec_shl16v helper_gvec_shl16v_m68k
 #define helper_gvec_shl32v helper_gvec_shl32v_m68k
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_m68k
 #define helper_gvec_sar32v helper_gvec_sar32v_m68k
 #define helper_gvec_sar64v helper_gvec_sar64v_m68k
+#define helper_gvec_rotl8v helper_gvec_rotl8v_m68k
+#define helper_gvec_rotl16v helper_gvec_rotl16v_m68k
+#define helper_gvec_rotl32v helper_gvec_rotl32v_m68k
+#define helper_gvec_rotl64v helper_gvec_rotl64v_m68k
+#define helper_gvec_rotr8v helper_gvec_rotr8v_m68k
+#define helper_gvec_rotr16v helper_gvec_rotr16v_m68k
+#define helper_gvec_rotr32v helper_gvec_rotr32v_m68k
+#define helper_gvec_rotr64v helper_gvec_rotr64v_m68k
 #define helper_gvec_eq8 helper_gvec_eq8_m68k
 #define helper_gvec_ne8 helper_gvec_ne8_m68k
 #define helper_gvec_lt8 helper_gvec_lt8_m68k
@@ -1420,7 +1474,6 @@
 #define helper_bfffo_mem helper_bfffo_mem_m68k
 #define helper_chk helper_chk_m68k
 #define helper_chk2 helper_chk2_m68k
-#define floatx80_mod floatx80_mod_m68k
 #define floatx80_getman floatx80_getman_m68k
 #define floatx80_getexp floatx80_getexp_m68k
 #define floatx80_scale floatx80_scale_m68k
diff --git a/qemu/mips.h b/qemu/mips.h
index 3a005710c7..b55e68792d 100644
--- a/qemu/mips.h
+++ b/qemu/mips.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_mips
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_mips
 #define tcg_gen_st_i64 tcg_gen_st_i64_mips
+#define tcg_gen_add_i64 tcg_gen_add_i64_mips
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_mips
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_mips
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_mips
 #define cpu_icount_to_ns cpu_icount_to_ns_mips
 #define cpu_is_stopped cpu_is_stopped_mips
 #define cpu_get_ticks cpu_get_ticks_mips
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_mips
 #define floatx80_mul floatx80_mul_mips
 #define floatx80_div floatx80_div_mips
+#define floatx80_modrem floatx80_modrem_mips
+#define floatx80_mod floatx80_mod_mips
 #define floatx80_rem floatx80_rem_mips
 #define floatx80_sqrt floatx80_sqrt_mips
 #define floatx80_eq floatx80_eq_mips
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mips
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mips
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mips
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mips
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mips
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mips
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mips
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mips
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mips
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mips
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mips
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mips
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mips
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mips
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mips
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mips
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mips
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mips
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mips
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mips
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mips
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mips
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mips
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mips
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mips
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mips
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_mips
 #define tcg_gen_shri_vec tcg_gen_shri_vec_mips
 #define tcg_gen_sari_vec tcg_gen_sari_vec_mips
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mips
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mips
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mips
 #define tcg_gen_add_vec tcg_gen_add_vec_mips
 #define tcg_gen_sub_vec tcg_gen_sub_vec_mips
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mips
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mips
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mips
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mips
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mips
 #define tcg_gen_shls_vec tcg_gen_shls_vec_mips
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mips
 #define tcg_gen_sars_vec tcg_gen_sars_vec_mips
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mips
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mips
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mips
 #define tb_htable_lookup tb_htable_lookup_mips
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_mips
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mips
 #define tlb_init tlb_init_mips
+#define tlb_destroy tlb_destroy_mips
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mips
 #define tlb_flush tlb_flush_mips
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mips
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_mips
 #define get_page_addr_code_hostp get_page_addr_code_hostp_mips
 #define get_page_addr_code get_page_addr_code_mips
+#define probe_access_flags probe_access_flags_mips
 #define probe_access probe_access_mips
 #define tlb_vaddr_to_host tlb_vaddr_to_host_mips
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mips
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mips
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mips
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mips
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mips
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mips
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mips
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mips
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mips
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mips
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mips
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mips
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mips
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mips
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mips
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mips
 #define cpu_ldub_data_ra cpu_ldub_data_ra_mips
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mips
-#define cpu_lduw_data_ra cpu_lduw_data_ra_mips
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mips
-#define cpu_ldl_data_ra cpu_ldl_data_ra_mips
-#define cpu_ldq_data_ra cpu_ldq_data_ra_mips
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mips
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mips
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mips
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mips
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mips
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mips
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mips
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mips
 #define cpu_ldub_data cpu_ldub_data_mips
 #define cpu_ldsb_data cpu_ldsb_data_mips
-#define cpu_lduw_data cpu_lduw_data_mips
-#define cpu_ldsw_data cpu_ldsw_data_mips
-#define cpu_ldl_data cpu_ldl_data_mips
-#define cpu_ldq_data cpu_ldq_data_mips
+#define cpu_lduw_be_data cpu_lduw_be_data_mips
+#define cpu_lduw_le_data cpu_lduw_le_data_mips
+#define cpu_ldsw_be_data cpu_ldsw_be_data_mips
+#define cpu_ldsw_le_data cpu_ldsw_le_data_mips
+#define cpu_ldl_be_data cpu_ldl_be_data_mips
+#define cpu_ldl_le_data cpu_ldl_le_data_mips
+#define cpu_ldq_le_data cpu_ldq_le_data_mips
+#define cpu_ldq_be_data cpu_ldq_be_data_mips
 #define helper_ret_stb_mmu helper_ret_stb_mmu_mips
 #define helper_le_stw_mmu helper_le_stw_mmu_mips
 #define helper_be_stw_mmu helper_be_stw_mmu_mips
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_mips
 #define helper_be_stq_mmu helper_be_stq_mmu_mips
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mips
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mips
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mips
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mips
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mips
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mips
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mips
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mips
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mips
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mips
 #define cpu_stb_data_ra cpu_stb_data_ra_mips
-#define cpu_stw_data_ra cpu_stw_data_ra_mips
-#define cpu_stl_data_ra cpu_stl_data_ra_mips
-#define cpu_stq_data_ra cpu_stq_data_ra_mips
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mips
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mips
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mips
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mips
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mips
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mips
 #define cpu_stb_data cpu_stb_data_mips
-#define cpu_stw_data cpu_stw_data_mips
-#define cpu_stl_data cpu_stl_data_mips
-#define cpu_stq_data cpu_stq_data_mips
+#define cpu_stw_be_data cpu_stw_be_data_mips
+#define cpu_stw_le_data cpu_stw_le_data_mips
+#define cpu_stl_be_data cpu_stl_be_data_mips
+#define cpu_stl_le_data cpu_stl_le_data_mips
+#define cpu_stq_be_data cpu_stq_be_data_mips
+#define cpu_stq_le_data cpu_stq_le_data_mips
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mips
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mips
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mips
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_mips
 #define cpu_ldl_code cpu_ldl_code_mips
 #define cpu_ldq_code cpu_ldq_code_mips
+#define cpu_interrupt_handler cpu_interrupt_handler_mips
 #define helper_div_i32 helper_div_i32_mips
 #define helper_rem_i32 helper_rem_i32_mips
 #define helper_divu_i32 helper_divu_i32_mips
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_mips
 #define helper_gvec_sar32i helper_gvec_sar32i_mips
 #define helper_gvec_sar64i helper_gvec_sar64i_mips
+#define helper_gvec_rotl8i helper_gvec_rotl8i_mips
+#define helper_gvec_rotl16i helper_gvec_rotl16i_mips
+#define helper_gvec_rotl32i helper_gvec_rotl32i_mips
+#define helper_gvec_rotl64i helper_gvec_rotl64i_mips
 #define helper_gvec_shl8v helper_gvec_shl8v_mips
 #define helper_gvec_shl16v helper_gvec_shl16v_mips
 #define helper_gvec_shl32v helper_gvec_shl32v_mips
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_mips
 #define helper_gvec_sar32v helper_gvec_sar32v_mips
 #define helper_gvec_sar64v helper_gvec_sar64v_mips
+#define helper_gvec_rotl8v helper_gvec_rotl8v_mips
+#define helper_gvec_rotl16v helper_gvec_rotl16v_mips
+#define helper_gvec_rotl32v helper_gvec_rotl32v_mips
+#define helper_gvec_rotl64v helper_gvec_rotl64v_mips
+#define helper_gvec_rotr8v helper_gvec_rotr8v_mips
+#define helper_gvec_rotr16v helper_gvec_rotr16v_mips
+#define helper_gvec_rotr32v helper_gvec_rotr32v_mips
+#define helper_gvec_rotr64v helper_gvec_rotr64v_mips
 #define helper_gvec_eq8 helper_gvec_eq8_mips
 #define helper_gvec_ne8 helper_gvec_ne8_mips
 #define helper_gvec_lt8 helper_gvec_lt8_mips
@@ -1677,7 +1731,6 @@
 #define helper_rddsp helper_rddsp_mips
 #define helper_cfc1 helper_cfc1_mips
 #define helper_ctc1 helper_ctc1_mips
-#define ieee_ex_to_mips ieee_ex_to_mips_mips
 #define helper_float_sqrt_d helper_float_sqrt_d_mips
 #define helper_float_sqrt_s helper_float_sqrt_s_mips
 #define helper_float_cvtd_s helper_float_cvtd_s_mips
@@ -2232,23 +2285,59 @@
 #define helper_msa_srlri_df helper_msa_srlri_df_mips
 #define helper_msa_binsli_df helper_msa_binsli_df_mips
 #define helper_msa_binsri_df helper_msa_binsri_df_mips
-#define helper_msa_subv_df helper_msa_subv_df_mips
-#define helper_msa_subs_s_df helper_msa_subs_s_df_mips
-#define helper_msa_subs_u_df helper_msa_subs_u_df_mips
-#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mips
-#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mips
-#define helper_msa_mulv_df helper_msa_mulv_df_mips
-#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mips
-#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mips
+#define helper_msa_subv_b helper_msa_subv_b_mips
+#define helper_msa_subv_h helper_msa_subv_h_mips
+#define helper_msa_subv_w helper_msa_subv_w_mips
+#define helper_msa_subv_d helper_msa_subv_d_mips
+#define helper_msa_subs_s_b helper_msa_subs_s_b_mips
+#define helper_msa_subs_s_h helper_msa_subs_s_h_mips
+#define helper_msa_subs_s_w helper_msa_subs_s_w_mips
+#define helper_msa_subs_s_d helper_msa_subs_s_d_mips
+#define helper_msa_subs_u_b helper_msa_subs_u_b_mips
+#define helper_msa_subs_u_h helper_msa_subs_u_h_mips
+#define helper_msa_subs_u_w helper_msa_subs_u_w_mips
+#define helper_msa_subs_u_d helper_msa_subs_u_d_mips
+#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mips
+#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mips
+#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mips
+#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mips
+#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mips
+#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mips
+#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mips
+#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mips
+#define helper_msa_mulv_b helper_msa_mulv_b_mips
+#define helper_msa_mulv_h helper_msa_mulv_h_mips
+#define helper_msa_mulv_w helper_msa_mulv_w_mips
+#define helper_msa_mulv_d helper_msa_mulv_d_mips
+#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mips
+#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mips
+#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mips
+#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mips
+#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mips
+#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mips
 #define helper_msa_mul_q_df helper_msa_mul_q_df_mips
 #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mips
 #define helper_msa_sld_df helper_msa_sld_df_mips
-#define helper_msa_maddv_df helper_msa_maddv_df_mips
-#define helper_msa_msubv_df helper_msa_msubv_df_mips
-#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mips
-#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mips
-#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mips
-#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mips
+#define helper_msa_maddv_b helper_msa_maddv_b_mips
+#define helper_msa_maddv_h helper_msa_maddv_h_mips
+#define helper_msa_maddv_w helper_msa_maddv_w_mips
+#define helper_msa_maddv_d helper_msa_maddv_d_mips
+#define helper_msa_msubv_b helper_msa_msubv_b_mips
+#define helper_msa_msubv_h helper_msa_msubv_h_mips
+#define helper_msa_msubv_w helper_msa_msubv_w_mips
+#define helper_msa_msubv_d helper_msa_msubv_d_mips
+#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mips
+#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mips
+#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mips
+#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mips
+#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mips
+#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mips
+#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mips
+#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mips
+#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mips
+#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mips
+#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mips
+#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mips
 #define helper_msa_binsl_df helper_msa_binsl_df_mips
 #define helper_msa_binsr_df helper_msa_binsr_df_mips
 #define helper_msa_madd_q_df helper_msa_madd_q_df_mips
diff --git a/qemu/mips64.h b/qemu/mips64.h
index 367c6b7e79..76990196b2 100644
--- a/qemu/mips64.h
+++ b/qemu/mips64.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_mips64
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_mips64
 #define tcg_gen_st_i64 tcg_gen_st_i64_mips64
+#define tcg_gen_add_i64 tcg_gen_add_i64_mips64
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_mips64
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_mips64
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_mips64
 #define cpu_icount_to_ns cpu_icount_to_ns_mips64
 #define cpu_is_stopped cpu_is_stopped_mips64
 #define cpu_get_ticks cpu_get_ticks_mips64
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_mips64
 #define floatx80_mul floatx80_mul_mips64
 #define floatx80_div floatx80_div_mips64
+#define floatx80_modrem floatx80_modrem_mips64
+#define floatx80_mod floatx80_mod_mips64
 #define floatx80_rem floatx80_rem_mips64
 #define floatx80_sqrt floatx80_sqrt_mips64
 #define floatx80_eq floatx80_eq_mips64
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mips64
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mips64
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mips64
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mips64
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mips64
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mips64
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mips64
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mips64
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mips64
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mips64
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mips64
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mips64
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mips64
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mips64
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mips64
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mips64
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mips64
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mips64
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mips64
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mips64
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mips64
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mips64
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mips64
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mips64
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mips64
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mips64
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips64
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_mips64
 #define tcg_gen_shri_vec tcg_gen_shri_vec_mips64
 #define tcg_gen_sari_vec tcg_gen_sari_vec_mips64
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mips64
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mips64
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mips64
 #define tcg_gen_add_vec tcg_gen_add_vec_mips64
 #define tcg_gen_sub_vec tcg_gen_sub_vec_mips64
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mips64
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mips64
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mips64
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mips64
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mips64
 #define tcg_gen_shls_vec tcg_gen_shls_vec_mips64
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mips64
 #define tcg_gen_sars_vec tcg_gen_sars_vec_mips64
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mips64
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mips64
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mips64
 #define tb_htable_lookup tb_htable_lookup_mips64
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_mips64
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mips64
 #define tlb_init tlb_init_mips64
+#define tlb_destroy tlb_destroy_mips64
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mips64
 #define tlb_flush tlb_flush_mips64
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mips64
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_mips64
 #define get_page_addr_code_hostp get_page_addr_code_hostp_mips64
 #define get_page_addr_code get_page_addr_code_mips64
+#define probe_access_flags probe_access_flags_mips64
 #define probe_access probe_access_mips64
 #define tlb_vaddr_to_host tlb_vaddr_to_host_mips64
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mips64
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mips64
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mips64
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mips64
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mips64
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mips64
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mips64
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mips64
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mips64
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mips64
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mips64
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mips64
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mips64
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mips64
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mips64
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mips64
 #define cpu_ldub_data_ra cpu_ldub_data_ra_mips64
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mips64
-#define cpu_lduw_data_ra cpu_lduw_data_ra_mips64
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mips64
-#define cpu_ldl_data_ra cpu_ldl_data_ra_mips64
-#define cpu_ldq_data_ra cpu_ldq_data_ra_mips64
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mips64
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mips64
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mips64
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mips64
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mips64
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mips64
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mips64
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mips64
 #define cpu_ldub_data cpu_ldub_data_mips64
 #define cpu_ldsb_data cpu_ldsb_data_mips64
-#define cpu_lduw_data cpu_lduw_data_mips64
-#define cpu_ldsw_data cpu_ldsw_data_mips64
-#define cpu_ldl_data cpu_ldl_data_mips64
-#define cpu_ldq_data cpu_ldq_data_mips64
+#define cpu_lduw_be_data cpu_lduw_be_data_mips64
+#define cpu_lduw_le_data cpu_lduw_le_data_mips64
+#define cpu_ldsw_be_data cpu_ldsw_be_data_mips64
+#define cpu_ldsw_le_data cpu_ldsw_le_data_mips64
+#define cpu_ldl_be_data cpu_ldl_be_data_mips64
+#define cpu_ldl_le_data cpu_ldl_le_data_mips64
+#define cpu_ldq_le_data cpu_ldq_le_data_mips64
+#define cpu_ldq_be_data cpu_ldq_be_data_mips64
 #define helper_ret_stb_mmu helper_ret_stb_mmu_mips64
 #define helper_le_stw_mmu helper_le_stw_mmu_mips64
 #define helper_be_stw_mmu helper_be_stw_mmu_mips64
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_mips64
 #define helper_be_stq_mmu helper_be_stq_mmu_mips64
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mips64
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mips64
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mips64
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mips64
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mips64
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mips64
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mips64
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mips64
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mips64
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mips64
 #define cpu_stb_data_ra cpu_stb_data_ra_mips64
-#define cpu_stw_data_ra cpu_stw_data_ra_mips64
-#define cpu_stl_data_ra cpu_stl_data_ra_mips64
-#define cpu_stq_data_ra cpu_stq_data_ra_mips64
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mips64
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mips64
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mips64
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mips64
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mips64
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mips64
 #define cpu_stb_data cpu_stb_data_mips64
-#define cpu_stw_data cpu_stw_data_mips64
-#define cpu_stl_data cpu_stl_data_mips64
-#define cpu_stq_data cpu_stq_data_mips64
+#define cpu_stw_be_data cpu_stw_be_data_mips64
+#define cpu_stw_le_data cpu_stw_le_data_mips64
+#define cpu_stl_be_data cpu_stl_be_data_mips64
+#define cpu_stl_le_data cpu_stl_le_data_mips64
+#define cpu_stq_be_data cpu_stq_be_data_mips64
+#define cpu_stq_le_data cpu_stq_le_data_mips64
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mips64
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mips64
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mips64
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_mips64
 #define cpu_ldl_code cpu_ldl_code_mips64
 #define cpu_ldq_code cpu_ldq_code_mips64
+#define cpu_interrupt_handler cpu_interrupt_handler_mips64
 #define helper_div_i32 helper_div_i32_mips64
 #define helper_rem_i32 helper_rem_i32_mips64
 #define helper_divu_i32 helper_divu_i32_mips64
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_mips64
 #define helper_gvec_sar32i helper_gvec_sar32i_mips64
 #define helper_gvec_sar64i helper_gvec_sar64i_mips64
+#define helper_gvec_rotl8i helper_gvec_rotl8i_mips64
+#define helper_gvec_rotl16i helper_gvec_rotl16i_mips64
+#define helper_gvec_rotl32i helper_gvec_rotl32i_mips64
+#define helper_gvec_rotl64i helper_gvec_rotl64i_mips64
 #define helper_gvec_shl8v helper_gvec_shl8v_mips64
 #define helper_gvec_shl16v helper_gvec_shl16v_mips64
 #define helper_gvec_shl32v helper_gvec_shl32v_mips64
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_mips64
 #define helper_gvec_sar32v helper_gvec_sar32v_mips64
 #define helper_gvec_sar64v helper_gvec_sar64v_mips64
+#define helper_gvec_rotl8v helper_gvec_rotl8v_mips64
+#define helper_gvec_rotl16v helper_gvec_rotl16v_mips64
+#define helper_gvec_rotl32v helper_gvec_rotl32v_mips64
+#define helper_gvec_rotl64v helper_gvec_rotl64v_mips64
+#define helper_gvec_rotr8v helper_gvec_rotr8v_mips64
+#define helper_gvec_rotr16v helper_gvec_rotr16v_mips64
+#define helper_gvec_rotr32v helper_gvec_rotr32v_mips64
+#define helper_gvec_rotr64v helper_gvec_rotr64v_mips64
 #define helper_gvec_eq8 helper_gvec_eq8_mips64
 #define helper_gvec_ne8 helper_gvec_ne8_mips64
 #define helper_gvec_lt8 helper_gvec_lt8_mips64
@@ -1677,7 +1731,6 @@
 #define helper_rddsp helper_rddsp_mips64
 #define helper_cfc1 helper_cfc1_mips64
 #define helper_ctc1 helper_ctc1_mips64
-#define ieee_ex_to_mips ieee_ex_to_mips_mips64
 #define helper_float_sqrt_d helper_float_sqrt_d_mips64
 #define helper_float_sqrt_s helper_float_sqrt_s_mips64
 #define helper_float_cvtd_s helper_float_cvtd_s_mips64
@@ -2232,23 +2285,59 @@
 #define helper_msa_srlri_df helper_msa_srlri_df_mips64
 #define helper_msa_binsli_df helper_msa_binsli_df_mips64
 #define helper_msa_binsri_df helper_msa_binsri_df_mips64
-#define helper_msa_subv_df helper_msa_subv_df_mips64
-#define helper_msa_subs_s_df helper_msa_subs_s_df_mips64
-#define helper_msa_subs_u_df helper_msa_subs_u_df_mips64
-#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mips64
-#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mips64
-#define helper_msa_mulv_df helper_msa_mulv_df_mips64
-#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mips64
-#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mips64
+#define helper_msa_subv_b helper_msa_subv_b_mips64
+#define helper_msa_subv_h helper_msa_subv_h_mips64
+#define helper_msa_subv_w helper_msa_subv_w_mips64
+#define helper_msa_subv_d helper_msa_subv_d_mips64
+#define helper_msa_subs_s_b helper_msa_subs_s_b_mips64
+#define helper_msa_subs_s_h helper_msa_subs_s_h_mips64
+#define helper_msa_subs_s_w helper_msa_subs_s_w_mips64
+#define helper_msa_subs_s_d helper_msa_subs_s_d_mips64
+#define helper_msa_subs_u_b helper_msa_subs_u_b_mips64
+#define helper_msa_subs_u_h helper_msa_subs_u_h_mips64
+#define helper_msa_subs_u_w helper_msa_subs_u_w_mips64
+#define helper_msa_subs_u_d helper_msa_subs_u_d_mips64
+#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mips64
+#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mips64
+#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mips64
+#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mips64
+#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mips64
+#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mips64
+#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mips64
+#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mips64
+#define helper_msa_mulv_b helper_msa_mulv_b_mips64
+#define helper_msa_mulv_h helper_msa_mulv_h_mips64
+#define helper_msa_mulv_w helper_msa_mulv_w_mips64
+#define helper_msa_mulv_d helper_msa_mulv_d_mips64
+#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mips64
+#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mips64
+#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mips64
+#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mips64
+#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mips64
+#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mips64
 #define helper_msa_mul_q_df helper_msa_mul_q_df_mips64
 #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mips64
 #define helper_msa_sld_df helper_msa_sld_df_mips64
-#define helper_msa_maddv_df helper_msa_maddv_df_mips64
-#define helper_msa_msubv_df helper_msa_msubv_df_mips64
-#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mips64
-#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mips64
-#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mips64
-#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mips64
+#define helper_msa_maddv_b helper_msa_maddv_b_mips64
+#define helper_msa_maddv_h helper_msa_maddv_h_mips64
+#define helper_msa_maddv_w helper_msa_maddv_w_mips64
+#define helper_msa_maddv_d helper_msa_maddv_d_mips64
+#define helper_msa_msubv_b helper_msa_msubv_b_mips64
+#define helper_msa_msubv_h helper_msa_msubv_h_mips64
+#define helper_msa_msubv_w helper_msa_msubv_w_mips64
+#define helper_msa_msubv_d helper_msa_msubv_d_mips64
+#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mips64
+#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mips64
+#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mips64
+#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mips64
+#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mips64
+#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mips64
+#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mips64
+#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mips64
+#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mips64
+#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mips64
+#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mips64
+#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mips64
 #define helper_msa_binsl_df helper_msa_binsl_df_mips64
 #define helper_msa_binsr_df helper_msa_binsr_df_mips64
 #define helper_msa_madd_q_df helper_msa_madd_q_df_mips64
diff --git a/qemu/mips64el.h b/qemu/mips64el.h
index 1c3f8ca26f..d8c1ac16b7 100644
--- a/qemu/mips64el.h
+++ b/qemu/mips64el.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_mips64el
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_mips64el
 #define tcg_gen_st_i64 tcg_gen_st_i64_mips64el
+#define tcg_gen_add_i64 tcg_gen_add_i64_mips64el
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_mips64el
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_mips64el
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_mips64el
 #define cpu_icount_to_ns cpu_icount_to_ns_mips64el
 #define cpu_is_stopped cpu_is_stopped_mips64el
 #define cpu_get_ticks cpu_get_ticks_mips64el
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_mips64el
 #define floatx80_mul floatx80_mul_mips64el
 #define floatx80_div floatx80_div_mips64el
+#define floatx80_modrem floatx80_modrem_mips64el
+#define floatx80_mod floatx80_mod_mips64el
 #define floatx80_rem floatx80_rem_mips64el
 #define floatx80_sqrt floatx80_sqrt_mips64el
 #define floatx80_eq floatx80_eq_mips64el
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mips64el
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mips64el
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mips64el
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mips64el
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mips64el
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mips64el
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mips64el
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mips64el
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mips64el
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mips64el
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mips64el
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mips64el
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mips64el
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mips64el
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mips64el
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mips64el
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mips64el
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mips64el
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mips64el
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mips64el
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mips64el
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mips64el
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mips64el
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mips64el
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mips64el
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mips64el
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips64el
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_mips64el
 #define tcg_gen_shri_vec tcg_gen_shri_vec_mips64el
 #define tcg_gen_sari_vec tcg_gen_sari_vec_mips64el
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mips64el
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mips64el
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mips64el
 #define tcg_gen_add_vec tcg_gen_add_vec_mips64el
 #define tcg_gen_sub_vec tcg_gen_sub_vec_mips64el
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mips64el
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mips64el
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mips64el
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mips64el
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mips64el
 #define tcg_gen_shls_vec tcg_gen_shls_vec_mips64el
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mips64el
 #define tcg_gen_sars_vec tcg_gen_sars_vec_mips64el
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mips64el
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mips64el
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mips64el
 #define tb_htable_lookup tb_htable_lookup_mips64el
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_mips64el
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mips64el
 #define tlb_init tlb_init_mips64el
+#define tlb_destroy tlb_destroy_mips64el
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mips64el
 #define tlb_flush tlb_flush_mips64el
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mips64el
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_mips64el
 #define get_page_addr_code_hostp get_page_addr_code_hostp_mips64el
 #define get_page_addr_code get_page_addr_code_mips64el
+#define probe_access_flags probe_access_flags_mips64el
 #define probe_access probe_access_mips64el
 #define tlb_vaddr_to_host tlb_vaddr_to_host_mips64el
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mips64el
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mips64el
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mips64el
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mips64el
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mips64el
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mips64el
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mips64el
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mips64el
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mips64el
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mips64el
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mips64el
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mips64el
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mips64el
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mips64el
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mips64el
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mips64el
 #define cpu_ldub_data_ra cpu_ldub_data_ra_mips64el
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mips64el
-#define cpu_lduw_data_ra cpu_lduw_data_ra_mips64el
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mips64el
-#define cpu_ldl_data_ra cpu_ldl_data_ra_mips64el
-#define cpu_ldq_data_ra cpu_ldq_data_ra_mips64el
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mips64el
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mips64el
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mips64el
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mips64el
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mips64el
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mips64el
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mips64el
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mips64el
 #define cpu_ldub_data cpu_ldub_data_mips64el
 #define cpu_ldsb_data cpu_ldsb_data_mips64el
-#define cpu_lduw_data cpu_lduw_data_mips64el
-#define cpu_ldsw_data cpu_ldsw_data_mips64el
-#define cpu_ldl_data cpu_ldl_data_mips64el
-#define cpu_ldq_data cpu_ldq_data_mips64el
+#define cpu_lduw_be_data cpu_lduw_be_data_mips64el
+#define cpu_lduw_le_data cpu_lduw_le_data_mips64el
+#define cpu_ldsw_be_data cpu_ldsw_be_data_mips64el
+#define cpu_ldsw_le_data cpu_ldsw_le_data_mips64el
+#define cpu_ldl_be_data cpu_ldl_be_data_mips64el
+#define cpu_ldl_le_data cpu_ldl_le_data_mips64el
+#define cpu_ldq_le_data cpu_ldq_le_data_mips64el
+#define cpu_ldq_be_data cpu_ldq_be_data_mips64el
 #define helper_ret_stb_mmu helper_ret_stb_mmu_mips64el
 #define helper_le_stw_mmu helper_le_stw_mmu_mips64el
 #define helper_be_stw_mmu helper_be_stw_mmu_mips64el
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_mips64el
 #define helper_be_stq_mmu helper_be_stq_mmu_mips64el
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mips64el
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mips64el
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mips64el
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mips64el
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mips64el
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mips64el
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mips64el
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mips64el
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mips64el
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mips64el
 #define cpu_stb_data_ra cpu_stb_data_ra_mips64el
-#define cpu_stw_data_ra cpu_stw_data_ra_mips64el
-#define cpu_stl_data_ra cpu_stl_data_ra_mips64el
-#define cpu_stq_data_ra cpu_stq_data_ra_mips64el
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mips64el
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mips64el
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mips64el
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mips64el
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mips64el
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mips64el
 #define cpu_stb_data cpu_stb_data_mips64el
-#define cpu_stw_data cpu_stw_data_mips64el
-#define cpu_stl_data cpu_stl_data_mips64el
-#define cpu_stq_data cpu_stq_data_mips64el
+#define cpu_stw_be_data cpu_stw_be_data_mips64el
+#define cpu_stw_le_data cpu_stw_le_data_mips64el
+#define cpu_stl_be_data cpu_stl_be_data_mips64el
+#define cpu_stl_le_data cpu_stl_le_data_mips64el
+#define cpu_stq_be_data cpu_stq_be_data_mips64el
+#define cpu_stq_le_data cpu_stq_le_data_mips64el
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mips64el
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mips64el
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mips64el
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_mips64el
 #define cpu_ldl_code cpu_ldl_code_mips64el
 #define cpu_ldq_code cpu_ldq_code_mips64el
+#define cpu_interrupt_handler cpu_interrupt_handler_mips64el
 #define helper_div_i32 helper_div_i32_mips64el
 #define helper_rem_i32 helper_rem_i32_mips64el
 #define helper_divu_i32 helper_divu_i32_mips64el
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_mips64el
 #define helper_gvec_sar32i helper_gvec_sar32i_mips64el
 #define helper_gvec_sar64i helper_gvec_sar64i_mips64el
+#define helper_gvec_rotl8i helper_gvec_rotl8i_mips64el
+#define helper_gvec_rotl16i helper_gvec_rotl16i_mips64el
+#define helper_gvec_rotl32i helper_gvec_rotl32i_mips64el
+#define helper_gvec_rotl64i helper_gvec_rotl64i_mips64el
 #define helper_gvec_shl8v helper_gvec_shl8v_mips64el
 #define helper_gvec_shl16v helper_gvec_shl16v_mips64el
 #define helper_gvec_shl32v helper_gvec_shl32v_mips64el
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_mips64el
 #define helper_gvec_sar32v helper_gvec_sar32v_mips64el
 #define helper_gvec_sar64v helper_gvec_sar64v_mips64el
+#define helper_gvec_rotl8v helper_gvec_rotl8v_mips64el
+#define helper_gvec_rotl16v helper_gvec_rotl16v_mips64el
+#define helper_gvec_rotl32v helper_gvec_rotl32v_mips64el
+#define helper_gvec_rotl64v helper_gvec_rotl64v_mips64el
+#define helper_gvec_rotr8v helper_gvec_rotr8v_mips64el
+#define helper_gvec_rotr16v helper_gvec_rotr16v_mips64el
+#define helper_gvec_rotr32v helper_gvec_rotr32v_mips64el
+#define helper_gvec_rotr64v helper_gvec_rotr64v_mips64el
 #define helper_gvec_eq8 helper_gvec_eq8_mips64el
 #define helper_gvec_ne8 helper_gvec_ne8_mips64el
 #define helper_gvec_lt8 helper_gvec_lt8_mips64el
@@ -1677,7 +1731,6 @@
 #define helper_rddsp helper_rddsp_mips64el
 #define helper_cfc1 helper_cfc1_mips64el
 #define helper_ctc1 helper_ctc1_mips64el
-#define ieee_ex_to_mips ieee_ex_to_mips_mips64el
 #define helper_float_sqrt_d helper_float_sqrt_d_mips64el
 #define helper_float_sqrt_s helper_float_sqrt_s_mips64el
 #define helper_float_cvtd_s helper_float_cvtd_s_mips64el
@@ -2232,23 +2285,59 @@
 #define helper_msa_srlri_df helper_msa_srlri_df_mips64el
 #define helper_msa_binsli_df helper_msa_binsli_df_mips64el
 #define helper_msa_binsri_df helper_msa_binsri_df_mips64el
-#define helper_msa_subv_df helper_msa_subv_df_mips64el
-#define helper_msa_subs_s_df helper_msa_subs_s_df_mips64el
-#define helper_msa_subs_u_df helper_msa_subs_u_df_mips64el
-#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mips64el
-#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mips64el
-#define helper_msa_mulv_df helper_msa_mulv_df_mips64el
-#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mips64el
-#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mips64el
+#define helper_msa_subv_b helper_msa_subv_b_mips64el
+#define helper_msa_subv_h helper_msa_subv_h_mips64el
+#define helper_msa_subv_w helper_msa_subv_w_mips64el
+#define helper_msa_subv_d helper_msa_subv_d_mips64el
+#define helper_msa_subs_s_b helper_msa_subs_s_b_mips64el
+#define helper_msa_subs_s_h helper_msa_subs_s_h_mips64el
+#define helper_msa_subs_s_w helper_msa_subs_s_w_mips64el
+#define helper_msa_subs_s_d helper_msa_subs_s_d_mips64el
+#define helper_msa_subs_u_b helper_msa_subs_u_b_mips64el
+#define helper_msa_subs_u_h helper_msa_subs_u_h_mips64el
+#define helper_msa_subs_u_w helper_msa_subs_u_w_mips64el
+#define helper_msa_subs_u_d helper_msa_subs_u_d_mips64el
+#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mips64el
+#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mips64el
+#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mips64el
+#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mips64el
+#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mips64el
+#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mips64el
+#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mips64el
+#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mips64el
+#define helper_msa_mulv_b helper_msa_mulv_b_mips64el
+#define helper_msa_mulv_h helper_msa_mulv_h_mips64el
+#define helper_msa_mulv_w helper_msa_mulv_w_mips64el
+#define helper_msa_mulv_d helper_msa_mulv_d_mips64el
+#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mips64el
+#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mips64el
+#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mips64el
+#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mips64el
+#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mips64el
+#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mips64el
 #define helper_msa_mul_q_df helper_msa_mul_q_df_mips64el
 #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mips64el
 #define helper_msa_sld_df helper_msa_sld_df_mips64el
-#define helper_msa_maddv_df helper_msa_maddv_df_mips64el
-#define helper_msa_msubv_df helper_msa_msubv_df_mips64el
-#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mips64el
-#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mips64el
-#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mips64el
-#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mips64el
+#define helper_msa_maddv_b helper_msa_maddv_b_mips64el
+#define helper_msa_maddv_h helper_msa_maddv_h_mips64el
+#define helper_msa_maddv_w helper_msa_maddv_w_mips64el
+#define helper_msa_maddv_d helper_msa_maddv_d_mips64el
+#define helper_msa_msubv_b helper_msa_msubv_b_mips64el
+#define helper_msa_msubv_h helper_msa_msubv_h_mips64el
+#define helper_msa_msubv_w helper_msa_msubv_w_mips64el
+#define helper_msa_msubv_d helper_msa_msubv_d_mips64el
+#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mips64el
+#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mips64el
+#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mips64el
+#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mips64el
+#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mips64el
+#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mips64el
+#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mips64el
+#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mips64el
+#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mips64el
+#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mips64el
+#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mips64el
+#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mips64el
 #define helper_msa_binsl_df helper_msa_binsl_df_mips64el
 #define helper_msa_binsr_df helper_msa_binsr_df_mips64el
 #define helper_msa_madd_q_df helper_msa_madd_q_df_mips64el
diff --git a/qemu/mipsel.h b/qemu/mipsel.h
index 511cfcfb0c..5e47f6184d 100644
--- a/qemu/mipsel.h
+++ b/qemu/mipsel.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_mipsel
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_mipsel
 #define tcg_gen_st_i64 tcg_gen_st_i64_mipsel
+#define tcg_gen_add_i64 tcg_gen_add_i64_mipsel
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_mipsel
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_mipsel
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_mipsel
 #define cpu_icount_to_ns cpu_icount_to_ns_mipsel
 #define cpu_is_stopped cpu_is_stopped_mipsel
 #define cpu_get_ticks cpu_get_ticks_mipsel
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_mipsel
 #define floatx80_mul floatx80_mul_mipsel
 #define floatx80_div floatx80_div_mipsel
+#define floatx80_modrem floatx80_modrem_mipsel
+#define floatx80_mod floatx80_mod_mipsel
 #define floatx80_rem floatx80_rem_mipsel
 #define floatx80_sqrt floatx80_sqrt_mipsel
 #define floatx80_eq floatx80_eq_mipsel
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mipsel
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mipsel
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mipsel
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mipsel
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mipsel
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mipsel
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mipsel
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mipsel
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mipsel
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mipsel
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mipsel
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mipsel
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mipsel
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mipsel
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mipsel
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mipsel
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mipsel
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mipsel
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mipsel
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mipsel
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mipsel
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mipsel
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mipsel
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mipsel
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mipsel
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mipsel
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mipsel
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_mipsel
 #define tcg_gen_shri_vec tcg_gen_shri_vec_mipsel
 #define tcg_gen_sari_vec tcg_gen_sari_vec_mipsel
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mipsel
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mipsel
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mipsel
 #define tcg_gen_add_vec tcg_gen_add_vec_mipsel
 #define tcg_gen_sub_vec tcg_gen_sub_vec_mipsel
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mipsel
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mipsel
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mipsel
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mipsel
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mipsel
 #define tcg_gen_shls_vec tcg_gen_shls_vec_mipsel
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mipsel
 #define tcg_gen_sars_vec tcg_gen_sars_vec_mipsel
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mipsel
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mipsel
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mipsel
 #define tb_htable_lookup tb_htable_lookup_mipsel
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_mipsel
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mipsel
 #define tlb_init tlb_init_mipsel
+#define tlb_destroy tlb_destroy_mipsel
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mipsel
 #define tlb_flush tlb_flush_mipsel
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mipsel
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_mipsel
 #define get_page_addr_code_hostp get_page_addr_code_hostp_mipsel
 #define get_page_addr_code get_page_addr_code_mipsel
+#define probe_access_flags probe_access_flags_mipsel
 #define probe_access probe_access_mipsel
 #define tlb_vaddr_to_host tlb_vaddr_to_host_mipsel
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mipsel
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mipsel
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mipsel
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mipsel
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mipsel
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mipsel
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mipsel
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mipsel
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mipsel
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mipsel
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mipsel
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mipsel
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mipsel
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mipsel
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mipsel
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mipsel
 #define cpu_ldub_data_ra cpu_ldub_data_ra_mipsel
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mipsel
-#define cpu_lduw_data_ra cpu_lduw_data_ra_mipsel
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mipsel
-#define cpu_ldl_data_ra cpu_ldl_data_ra_mipsel
-#define cpu_ldq_data_ra cpu_ldq_data_ra_mipsel
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mipsel
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mipsel
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mipsel
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mipsel
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mipsel
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mipsel
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mipsel
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mipsel
 #define cpu_ldub_data cpu_ldub_data_mipsel
 #define cpu_ldsb_data cpu_ldsb_data_mipsel
-#define cpu_lduw_data cpu_lduw_data_mipsel
-#define cpu_ldsw_data cpu_ldsw_data_mipsel
-#define cpu_ldl_data cpu_ldl_data_mipsel
-#define cpu_ldq_data cpu_ldq_data_mipsel
+#define cpu_lduw_be_data cpu_lduw_be_data_mipsel
+#define cpu_lduw_le_data cpu_lduw_le_data_mipsel
+#define cpu_ldsw_be_data cpu_ldsw_be_data_mipsel
+#define cpu_ldsw_le_data cpu_ldsw_le_data_mipsel
+#define cpu_ldl_be_data cpu_ldl_be_data_mipsel
+#define cpu_ldl_le_data cpu_ldl_le_data_mipsel
+#define cpu_ldq_le_data cpu_ldq_le_data_mipsel
+#define cpu_ldq_be_data cpu_ldq_be_data_mipsel
 #define helper_ret_stb_mmu helper_ret_stb_mmu_mipsel
 #define helper_le_stw_mmu helper_le_stw_mmu_mipsel
 #define helper_be_stw_mmu helper_be_stw_mmu_mipsel
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_mipsel
 #define helper_be_stq_mmu helper_be_stq_mmu_mipsel
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mipsel
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mipsel
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mipsel
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mipsel
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mipsel
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mipsel
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mipsel
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mipsel
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mipsel
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mipsel
 #define cpu_stb_data_ra cpu_stb_data_ra_mipsel
-#define cpu_stw_data_ra cpu_stw_data_ra_mipsel
-#define cpu_stl_data_ra cpu_stl_data_ra_mipsel
-#define cpu_stq_data_ra cpu_stq_data_ra_mipsel
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mipsel
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mipsel
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mipsel
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mipsel
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mipsel
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mipsel
 #define cpu_stb_data cpu_stb_data_mipsel
-#define cpu_stw_data cpu_stw_data_mipsel
-#define cpu_stl_data cpu_stl_data_mipsel
-#define cpu_stq_data cpu_stq_data_mipsel
+#define cpu_stw_be_data cpu_stw_be_data_mipsel
+#define cpu_stw_le_data cpu_stw_le_data_mipsel
+#define cpu_stl_be_data cpu_stl_be_data_mipsel
+#define cpu_stl_le_data cpu_stl_le_data_mipsel
+#define cpu_stq_be_data cpu_stq_be_data_mipsel
+#define cpu_stq_le_data cpu_stq_le_data_mipsel
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mipsel
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mipsel
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mipsel
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_mipsel
 #define cpu_ldl_code cpu_ldl_code_mipsel
 #define cpu_ldq_code cpu_ldq_code_mipsel
+#define cpu_interrupt_handler cpu_interrupt_handler_mipsel
 #define helper_div_i32 helper_div_i32_mipsel
 #define helper_rem_i32 helper_rem_i32_mipsel
 #define helper_divu_i32 helper_divu_i32_mipsel
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_mipsel
 #define helper_gvec_sar32i helper_gvec_sar32i_mipsel
 #define helper_gvec_sar64i helper_gvec_sar64i_mipsel
+#define helper_gvec_rotl8i helper_gvec_rotl8i_mipsel
+#define helper_gvec_rotl16i helper_gvec_rotl16i_mipsel
+#define helper_gvec_rotl32i helper_gvec_rotl32i_mipsel
+#define helper_gvec_rotl64i helper_gvec_rotl64i_mipsel
 #define helper_gvec_shl8v helper_gvec_shl8v_mipsel
 #define helper_gvec_shl16v helper_gvec_shl16v_mipsel
 #define helper_gvec_shl32v helper_gvec_shl32v_mipsel
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_mipsel
 #define helper_gvec_sar32v helper_gvec_sar32v_mipsel
 #define helper_gvec_sar64v helper_gvec_sar64v_mipsel
+#define helper_gvec_rotl8v helper_gvec_rotl8v_mipsel
+#define helper_gvec_rotl16v helper_gvec_rotl16v_mipsel
+#define helper_gvec_rotl32v helper_gvec_rotl32v_mipsel
+#define helper_gvec_rotl64v helper_gvec_rotl64v_mipsel
+#define helper_gvec_rotr8v helper_gvec_rotr8v_mipsel
+#define helper_gvec_rotr16v helper_gvec_rotr16v_mipsel
+#define helper_gvec_rotr32v helper_gvec_rotr32v_mipsel
+#define helper_gvec_rotr64v helper_gvec_rotr64v_mipsel
 #define helper_gvec_eq8 helper_gvec_eq8_mipsel
 #define helper_gvec_ne8 helper_gvec_ne8_mipsel
 #define helper_gvec_lt8 helper_gvec_lt8_mipsel
@@ -1677,7 +1731,6 @@
 #define helper_rddsp helper_rddsp_mipsel
 #define helper_cfc1 helper_cfc1_mipsel
 #define helper_ctc1 helper_ctc1_mipsel
-#define ieee_ex_to_mips ieee_ex_to_mips_mipsel
 #define helper_float_sqrt_d helper_float_sqrt_d_mipsel
 #define helper_float_sqrt_s helper_float_sqrt_s_mipsel
 #define helper_float_cvtd_s helper_float_cvtd_s_mipsel
@@ -2232,23 +2285,59 @@
 #define helper_msa_srlri_df helper_msa_srlri_df_mipsel
 #define helper_msa_binsli_df helper_msa_binsli_df_mipsel
 #define helper_msa_binsri_df helper_msa_binsri_df_mipsel
-#define helper_msa_subv_df helper_msa_subv_df_mipsel
-#define helper_msa_subs_s_df helper_msa_subs_s_df_mipsel
-#define helper_msa_subs_u_df helper_msa_subs_u_df_mipsel
-#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mipsel
-#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mipsel
-#define helper_msa_mulv_df helper_msa_mulv_df_mipsel
-#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mipsel
-#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mipsel
+#define helper_msa_subv_b helper_msa_subv_b_mipsel
+#define helper_msa_subv_h helper_msa_subv_h_mipsel
+#define helper_msa_subv_w helper_msa_subv_w_mipsel
+#define helper_msa_subv_d helper_msa_subv_d_mipsel
+#define helper_msa_subs_s_b helper_msa_subs_s_b_mipsel
+#define helper_msa_subs_s_h helper_msa_subs_s_h_mipsel
+#define helper_msa_subs_s_w helper_msa_subs_s_w_mipsel
+#define helper_msa_subs_s_d helper_msa_subs_s_d_mipsel
+#define helper_msa_subs_u_b helper_msa_subs_u_b_mipsel
+#define helper_msa_subs_u_h helper_msa_subs_u_h_mipsel
+#define helper_msa_subs_u_w helper_msa_subs_u_w_mipsel
+#define helper_msa_subs_u_d helper_msa_subs_u_d_mipsel
+#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mipsel
+#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mipsel
+#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mipsel
+#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mipsel
+#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mipsel
+#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mipsel
+#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mipsel
+#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mipsel
+#define helper_msa_mulv_b helper_msa_mulv_b_mipsel
+#define helper_msa_mulv_h helper_msa_mulv_h_mipsel
+#define helper_msa_mulv_w helper_msa_mulv_w_mipsel
+#define helper_msa_mulv_d helper_msa_mulv_d_mipsel
+#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mipsel
+#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mipsel
+#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mipsel
+#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mipsel
+#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mipsel
+#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mipsel
 #define helper_msa_mul_q_df helper_msa_mul_q_df_mipsel
 #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mipsel
 #define helper_msa_sld_df helper_msa_sld_df_mipsel
-#define helper_msa_maddv_df helper_msa_maddv_df_mipsel
-#define helper_msa_msubv_df helper_msa_msubv_df_mipsel
-#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mipsel
-#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mipsel
-#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mipsel
-#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mipsel
+#define helper_msa_maddv_b helper_msa_maddv_b_mipsel
+#define helper_msa_maddv_h helper_msa_maddv_h_mipsel
+#define helper_msa_maddv_w helper_msa_maddv_w_mipsel
+#define helper_msa_maddv_d helper_msa_maddv_d_mipsel
+#define helper_msa_msubv_b helper_msa_msubv_b_mipsel
+#define helper_msa_msubv_h helper_msa_msubv_h_mipsel
+#define helper_msa_msubv_w helper_msa_msubv_w_mipsel
+#define helper_msa_msubv_d helper_msa_msubv_d_mipsel
+#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mipsel
+#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mipsel
+#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mipsel
+#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mipsel
+#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mipsel
+#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mipsel
+#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mipsel
+#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mipsel
+#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mipsel
+#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mipsel
+#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mipsel
+#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mipsel
 #define helper_msa_binsl_df helper_msa_binsl_df_mipsel
 #define helper_msa_binsr_df helper_msa_binsr_df_mipsel
 #define helper_msa_madd_q_df helper_msa_madd_q_df_mipsel
diff --git a/qemu/ppc.h b/qemu/ppc.h
index 7fd122913d..28ca9753e2 100644
--- a/qemu/ppc.h
+++ b/qemu/ppc.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_ppc
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_ppc
 #define tcg_gen_st_i64 tcg_gen_st_i64_ppc
+#define tcg_gen_add_i64 tcg_gen_add_i64_ppc
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_ppc
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_ppc
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_ppc
 #define cpu_icount_to_ns cpu_icount_to_ns_ppc
 #define cpu_is_stopped cpu_is_stopped_ppc
 #define cpu_get_ticks cpu_get_ticks_ppc
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_ppc
 #define floatx80_mul floatx80_mul_ppc
 #define floatx80_div floatx80_div_ppc
+#define floatx80_modrem floatx80_modrem_ppc
+#define floatx80_mod floatx80_mod_ppc
 #define floatx80_rem floatx80_rem_ppc
 #define floatx80_sqrt floatx80_sqrt_ppc
 #define floatx80_eq floatx80_eq_ppc
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_ppc
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_ppc
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_ppc
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_ppc
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_ppc
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_ppc
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_ppc
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_ppc
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_ppc
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_ppc
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_ppc
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_ppc
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_ppc
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_ppc
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_ppc
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_ppc
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_ppc
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_ppc
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_ppc
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_ppc
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_ppc
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_ppc
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_ppc
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_ppc
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_ppc
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_ppc
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_ppc
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_ppc
 #define tcg_gen_shri_vec tcg_gen_shri_vec_ppc
 #define tcg_gen_sari_vec tcg_gen_sari_vec_ppc
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_ppc
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_ppc
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_ppc
 #define tcg_gen_add_vec tcg_gen_add_vec_ppc
 #define tcg_gen_sub_vec tcg_gen_sub_vec_ppc
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_ppc
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_ppc
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_ppc
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_ppc
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_ppc
 #define tcg_gen_shls_vec tcg_gen_shls_vec_ppc
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_ppc
 #define tcg_gen_sars_vec tcg_gen_sars_vec_ppc
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_ppc
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_ppc
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_ppc
 #define tb_htable_lookup tb_htable_lookup_ppc
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_ppc
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_ppc
 #define tlb_init tlb_init_ppc
+#define tlb_destroy tlb_destroy_ppc
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_ppc
 #define tlb_flush tlb_flush_ppc
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_ppc
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_ppc
 #define get_page_addr_code_hostp get_page_addr_code_hostp_ppc
 #define get_page_addr_code get_page_addr_code_ppc
+#define probe_access_flags probe_access_flags_ppc
 #define probe_access probe_access_ppc
 #define tlb_vaddr_to_host tlb_vaddr_to_host_ppc
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_ppc
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_ppc
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_ppc
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_ppc
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_ppc
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_ppc
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_ppc
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_ppc
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_ppc
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_ppc
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_ppc
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_ppc
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_ppc
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_ppc
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_ppc
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_ppc
 #define cpu_ldub_data_ra cpu_ldub_data_ra_ppc
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_ppc
-#define cpu_lduw_data_ra cpu_lduw_data_ra_ppc
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_ppc
-#define cpu_ldl_data_ra cpu_ldl_data_ra_ppc
-#define cpu_ldq_data_ra cpu_ldq_data_ra_ppc
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_ppc
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_ppc
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_ppc
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_ppc
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_ppc
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_ppc
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_ppc
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_ppc
 #define cpu_ldub_data cpu_ldub_data_ppc
 #define cpu_ldsb_data cpu_ldsb_data_ppc
-#define cpu_lduw_data cpu_lduw_data_ppc
-#define cpu_ldsw_data cpu_ldsw_data_ppc
-#define cpu_ldl_data cpu_ldl_data_ppc
-#define cpu_ldq_data cpu_ldq_data_ppc
+#define cpu_lduw_be_data cpu_lduw_be_data_ppc
+#define cpu_lduw_le_data cpu_lduw_le_data_ppc
+#define cpu_ldsw_be_data cpu_ldsw_be_data_ppc
+#define cpu_ldsw_le_data cpu_ldsw_le_data_ppc
+#define cpu_ldl_be_data cpu_ldl_be_data_ppc
+#define cpu_ldl_le_data cpu_ldl_le_data_ppc
+#define cpu_ldq_le_data cpu_ldq_le_data_ppc
+#define cpu_ldq_be_data cpu_ldq_be_data_ppc
 #define helper_ret_stb_mmu helper_ret_stb_mmu_ppc
 #define helper_le_stw_mmu helper_le_stw_mmu_ppc
 #define helper_be_stw_mmu helper_be_stw_mmu_ppc
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_ppc
 #define helper_be_stq_mmu helper_be_stq_mmu_ppc
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_ppc
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_ppc
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_ppc
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_ppc
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_ppc
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_ppc
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_ppc
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_ppc
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_ppc
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_ppc
 #define cpu_stb_data_ra cpu_stb_data_ra_ppc
-#define cpu_stw_data_ra cpu_stw_data_ra_ppc
-#define cpu_stl_data_ra cpu_stl_data_ra_ppc
-#define cpu_stq_data_ra cpu_stq_data_ra_ppc
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_ppc
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_ppc
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_ppc
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_ppc
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_ppc
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_ppc
 #define cpu_stb_data cpu_stb_data_ppc
-#define cpu_stw_data cpu_stw_data_ppc
-#define cpu_stl_data cpu_stl_data_ppc
-#define cpu_stq_data cpu_stq_data_ppc
+#define cpu_stw_be_data cpu_stw_be_data_ppc
+#define cpu_stw_le_data cpu_stw_le_data_ppc
+#define cpu_stl_be_data cpu_stl_be_data_ppc
+#define cpu_stl_le_data cpu_stl_le_data_ppc
+#define cpu_stq_be_data cpu_stq_be_data_ppc
+#define cpu_stq_le_data cpu_stq_le_data_ppc
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_ppc
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_ppc
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_ppc
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_ppc
 #define cpu_ldl_code cpu_ldl_code_ppc
 #define cpu_ldq_code cpu_ldq_code_ppc
+#define cpu_interrupt_handler cpu_interrupt_handler_ppc
 #define helper_div_i32 helper_div_i32_ppc
 #define helper_rem_i32 helper_rem_i32_ppc
 #define helper_divu_i32 helper_divu_i32_ppc
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_ppc
 #define helper_gvec_sar32i helper_gvec_sar32i_ppc
 #define helper_gvec_sar64i helper_gvec_sar64i_ppc
+#define helper_gvec_rotl8i helper_gvec_rotl8i_ppc
+#define helper_gvec_rotl16i helper_gvec_rotl16i_ppc
+#define helper_gvec_rotl32i helper_gvec_rotl32i_ppc
+#define helper_gvec_rotl64i helper_gvec_rotl64i_ppc
 #define helper_gvec_shl8v helper_gvec_shl8v_ppc
 #define helper_gvec_shl16v helper_gvec_shl16v_ppc
 #define helper_gvec_shl32v helper_gvec_shl32v_ppc
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_ppc
 #define helper_gvec_sar32v helper_gvec_sar32v_ppc
 #define helper_gvec_sar64v helper_gvec_sar64v_ppc
+#define helper_gvec_rotl8v helper_gvec_rotl8v_ppc
+#define helper_gvec_rotl16v helper_gvec_rotl16v_ppc
+#define helper_gvec_rotl32v helper_gvec_rotl32v_ppc
+#define helper_gvec_rotl64v helper_gvec_rotl64v_ppc
+#define helper_gvec_rotr8v helper_gvec_rotr8v_ppc
+#define helper_gvec_rotr16v helper_gvec_rotr16v_ppc
+#define helper_gvec_rotr32v helper_gvec_rotr32v_ppc
+#define helper_gvec_rotr64v helper_gvec_rotr64v_ppc
 #define helper_gvec_eq8 helper_gvec_eq8_ppc
 #define helper_gvec_ne8 helper_gvec_ne8_ppc
 #define helper_gvec_lt8 helper_gvec_lt8_ppc
@@ -1575,6 +1629,33 @@
 #define helper_tbegin helper_tbegin_ppc
 #define helper_load_dump_spr helper_load_dump_spr_ppc
 #define helper_store_dump_spr helper_store_dump_spr_ppc
+#define store_fpscr store_fpscr_ppc
+#define helper_store_fpscr helper_store_fpscr_ppc
+#define helper_float_check_status helper_float_check_status_ppc
+#define helper_reset_fpstatus helper_reset_fpstatus_ppc
+#define helper_fadd helper_fadd_ppc
+#define helper_fsub helper_fsub_ppc
+#define helper_fmul helper_fmul_ppc
+#define helper_fdiv helper_fdiv_ppc
+#define helper_fctiw helper_fctiw_ppc
+#define helper_fctiwz helper_fctiwz_ppc
+#define helper_fctiwuz helper_fctiwuz_ppc
+#define helper_fctid helper_fctid_ppc
+#define helper_fctidz helper_fctidz_ppc
+#define helper_fctidu helper_fctidu_ppc
+#define helper_fctiduz helper_fctiduz_ppc
+#define helper_fcfid helper_fcfid_ppc
+#define helper_fcfids helper_fcfids_ppc
+#define helper_fcfidu helper_fcfidu_ppc
+#define helper_fcfidus helper_fcfidus_ppc
+#define helper_frin helper_frin_ppc
+#define helper_friz helper_friz_ppc
+#define helper_frip helper_frip_ppc
+#define helper_frim helper_frim_ppc
+#define helper_fmadd helper_fmadd_ppc
+#define helper_fnmadd helper_fnmadd_ppc
+#define helper_fmsub helper_fmsub_ppc
+#define helper_fnmsub helper_fnmsub_ppc
 #define helper_hfscr_facility_check helper_hfscr_facility_check_ppc
 #define helper_fscr_facility_check helper_fscr_facility_check_ppc
 #define helper_msr_facility_check helper_msr_facility_check_ppc
@@ -1726,6 +1807,243 @@
 #define ppc_hash32_handle_mmu_fault ppc_hash32_handle_mmu_fault_ppc
 #define gen_helper_store_booke_tsr gen_helper_store_booke_tsr_ppc
 #define gen_helper_store_booke_tcr gen_helper_store_booke_tcr_ppc
+#define gen_helper_store_fpscr gen_helper_store_fpscr_ppc
 #define store_booke_tcr store_booke_tcr_ppc
 #define ppc_hash32_get_phys_page_debug ppc_hash32_get_phys_page_debug_ppc
+#define helper_compute_fprf_float128 helper_compute_fprf_float128_ppc
+#define helper_compute_fprf_float16 helper_compute_fprf_float16_ppc
+#define helper_compute_fprf_float32 helper_compute_fprf_float32_ppc
+#define helper_compute_fprf_float64 helper_compute_fprf_float64_ppc
+#define helper_efdadd helper_efdadd_ppc
+#define helper_efdcfs helper_efdcfs_ppc
+#define helper_efdcfsf helper_efdcfsf_ppc
+#define helper_efdcfsi helper_efdcfsi_ppc
+#define helper_efdcfsid helper_efdcfsid_ppc
+#define helper_efdcfuf helper_efdcfuf_ppc
+#define helper_efdcfui helper_efdcfui_ppc
+#define helper_efdcfuid helper_efdcfuid_ppc
+#define helper_efdcmpeq helper_efdcmpeq_ppc
+#define helper_efdcmpgt helper_efdcmpgt_ppc
+#define helper_efdcmplt helper_efdcmplt_ppc
+#define helper_efdctsf helper_efdctsf_ppc
+#define helper_efdctsi helper_efdctsi_ppc
+#define helper_efdctsidz helper_efdctsidz_ppc
+#define helper_efdctsiz helper_efdctsiz_ppc
+#define helper_efdctuf helper_efdctuf_ppc
+#define helper_efdctui helper_efdctui_ppc
+#define helper_efdctuidz helper_efdctuidz_ppc
+#define helper_efdctuiz helper_efdctuiz_ppc
+#define helper_efddiv helper_efddiv_ppc
+#define helper_efdmul helper_efdmul_ppc
+#define helper_efdsub helper_efdsub_ppc
+#define helper_efdtsteq helper_efdtsteq_ppc
+#define helper_efdtstgt helper_efdtstgt_ppc
+#define helper_efdtstlt helper_efdtstlt_ppc
+#define helper_efsadd helper_efsadd_ppc
+#define helper_efscfd helper_efscfd_ppc
+#define helper_efscfsf helper_efscfsf_ppc
+#define helper_efscfsi helper_efscfsi_ppc
+#define helper_efscfuf helper_efscfuf_ppc
+#define helper_efscfui helper_efscfui_ppc
+#define helper_efscmpeq helper_efscmpeq_ppc
+#define helper_efscmpgt helper_efscmpgt_ppc
+#define helper_efscmplt helper_efscmplt_ppc
+#define helper_efsctsf helper_efsctsf_ppc
+#define helper_efsctsi helper_efsctsi_ppc
+#define helper_efsctsiz helper_efsctsiz_ppc
+#define helper_efsctuf helper_efsctuf_ppc
+#define helper_efsctui helper_efsctui_ppc
+#define helper_efsctuiz helper_efsctuiz_ppc
+#define helper_efsdiv helper_efsdiv_ppc
+#define helper_efsmul helper_efsmul_ppc
+#define helper_efssub helper_efssub_ppc
+#define helper_efststeq helper_efststeq_ppc
+#define helper_efststgt helper_efststgt_ppc
+#define helper_efststlt helper_efststlt_ppc
+#define helper_evfsadd helper_evfsadd_ppc
+#define helper_evfscfsf helper_evfscfsf_ppc
+#define helper_evfscfsi helper_evfscfsi_ppc
+#define helper_evfscfuf helper_evfscfuf_ppc
+#define helper_evfscfui helper_evfscfui_ppc
+#define helper_evfscmpeq helper_evfscmpeq_ppc
+#define helper_evfscmpgt helper_evfscmpgt_ppc
+#define helper_evfscmplt helper_evfscmplt_ppc
+#define helper_evfsctsf helper_evfsctsf_ppc
+#define helper_evfsctsi helper_evfsctsi_ppc
+#define helper_evfsctsiz helper_evfsctsiz_ppc
+#define helper_evfsctuf helper_evfsctuf_ppc
+#define helper_evfsctui helper_evfsctui_ppc
+#define helper_evfsctuiz helper_evfsctuiz_ppc
+#define helper_evfsdiv helper_evfsdiv_ppc
+#define helper_evfsmul helper_evfsmul_ppc
+#define helper_evfssub helper_evfssub_ppc
+#define helper_evfststeq helper_evfststeq_ppc
+#define helper_evfststgt helper_evfststgt_ppc
+#define helper_evfststlt helper_evfststlt_ppc
+#define helper_fcmpo helper_fcmpo_ppc
+#define helper_fcmpu helper_fcmpu_ppc
+#define helper_fctiwu helper_fctiwu_ppc
+#define helper_fpscr_clrbit helper_fpscr_clrbit_ppc
+#define helper_fpscr_setbit helper_fpscr_setbit_ppc
+#define helper_fre helper_fre_ppc
+#define helper_fres helper_fres_ppc
+#define helper_frsp helper_frsp_ppc
+#define helper_frsqrte helper_frsqrte_ppc
+#define helper_fsel helper_fsel_ppc
+#define helper_fsqrt helper_fsqrt_ppc
+#define helper_ftdiv helper_ftdiv_ppc
+#define helper_ftsqrt helper_ftsqrt_ppc
+#define helper_todouble helper_todouble_ppc
+#define helper_tosingle helper_tosingle_ppc
+#define helper_xsadddp helper_xsadddp_ppc
+#define helper_xsaddqp helper_xsaddqp_ppc
+#define helper_xsaddsp helper_xsaddsp_ppc
+#define helper_xscmpeqdp helper_xscmpeqdp_ppc
+#define helper_xscmpexpdp helper_xscmpexpdp_ppc
+#define helper_xscmpexpqp helper_xscmpexpqp_ppc
+#define helper_xscmpgedp helper_xscmpgedp_ppc
+#define helper_xscmpgtdp helper_xscmpgtdp_ppc
+#define helper_xscmpnedp helper_xscmpnedp_ppc
+#define helper_xscmpodp helper_xscmpodp_ppc
+#define helper_xscmpoqp helper_xscmpoqp_ppc
+#define helper_xscmpudp helper_xscmpudp_ppc
+#define helper_xscmpuqp helper_xscmpuqp_ppc
+#define helper_xscvdphp helper_xscvdphp_ppc
+#define helper_xscvdpqp helper_xscvdpqp_ppc
+#define helper_xscvdpsp helper_xscvdpsp_ppc
+#define helper_xscvdpspn helper_xscvdpspn_ppc
+#define helper_xscvdpsxds helper_xscvdpsxds_ppc
+#define helper_xscvdpsxws helper_xscvdpsxws_ppc
+#define helper_xscvdpuxds helper_xscvdpuxds_ppc
+#define helper_xscvdpuxws helper_xscvdpuxws_ppc
+#define helper_xscvhpdp helper_xscvhpdp_ppc
+#define helper_xscvqpdp helper_xscvqpdp_ppc
+#define helper_xscvqpsdz helper_xscvqpsdz_ppc
+#define helper_xscvqpswz helper_xscvqpswz_ppc
+#define helper_xscvqpudz helper_xscvqpudz_ppc
+#define helper_xscvqpuwz helper_xscvqpuwz_ppc
+#define helper_xscvsdqp helper_xscvsdqp_ppc
+#define helper_xscvspdp helper_xscvspdp_ppc
+#define helper_xscvspdpn helper_xscvspdpn_ppc
+#define helper_xscvsxddp helper_xscvsxddp_ppc
+#define helper_xscvsxdsp helper_xscvsxdsp_ppc
+#define helper_xscvudqp helper_xscvudqp_ppc
+#define helper_xscvuxddp helper_xscvuxddp_ppc
+#define helper_xscvuxdsp helper_xscvuxdsp_ppc
+#define helper_xsdivdp helper_xsdivdp_ppc
+#define helper_xsdivqp helper_xsdivqp_ppc
+#define helper_xsdivsp helper_xsdivsp_ppc
+#define helper_xsmadddp helper_xsmadddp_ppc
+#define helper_xsmaddsp helper_xsmaddsp_ppc
+#define helper_xsmaxcdp helper_xsmaxcdp_ppc
+#define helper_xsmaxdp helper_xsmaxdp_ppc
+#define helper_xsmaxjdp helper_xsmaxjdp_ppc
+#define helper_xsmincdp helper_xsmincdp_ppc
+#define helper_xsmindp helper_xsmindp_ppc
+#define helper_xsminjdp helper_xsminjdp_ppc
+#define helper_xsmsubdp helper_xsmsubdp_ppc
+#define helper_xsmsubsp helper_xsmsubsp_ppc
+#define helper_xsmuldp helper_xsmuldp_ppc
+#define helper_xsmulqp helper_xsmulqp_ppc
+#define helper_xsmulsp helper_xsmulsp_ppc
+#define helper_xsnmadddp helper_xsnmadddp_ppc
+#define helper_xsnmaddsp helper_xsnmaddsp_ppc
+#define helper_xsnmsubdp helper_xsnmsubdp_ppc
+#define helper_xsnmsubsp helper_xsnmsubsp_ppc
+#define helper_xsrdpi helper_xsrdpi_ppc
+#define helper_xsrdpic helper_xsrdpic_ppc
+#define helper_xsrdpim helper_xsrdpim_ppc
+#define helper_xsrdpip helper_xsrdpip_ppc
+#define helper_xsrdpiz helper_xsrdpiz_ppc
+#define helper_xsredp helper_xsredp_ppc
+#define helper_xsresp helper_xsresp_ppc
+#define helper_xsrqpi helper_xsrqpi_ppc
+#define helper_xsrqpxp helper_xsrqpxp_ppc
+#define helper_xsrsp helper_xsrsp_ppc
+#define helper_xsrsqrtedp helper_xsrsqrtedp_ppc
+#define helper_xsrsqrtesp helper_xsrsqrtesp_ppc
+#define helper_xssqrtdp helper_xssqrtdp_ppc
+#define helper_xssqrtqp helper_xssqrtqp_ppc
+#define helper_xssqrtsp helper_xssqrtsp_ppc
+#define helper_xssubdp helper_xssubdp_ppc
+#define helper_xssubqp helper_xssubqp_ppc
+#define helper_xssubsp helper_xssubsp_ppc
+#define helper_xstdivdp helper_xstdivdp_ppc
+#define helper_xstsqrtdp helper_xstsqrtdp_ppc
+#define helper_xststdcdp helper_xststdcdp_ppc
+#define helper_xststdcqp helper_xststdcqp_ppc
+#define helper_xststdcsp helper_xststdcsp_ppc
+#define helper_xvadddp helper_xvadddp_ppc
+#define helper_xvaddsp helper_xvaddsp_ppc
+#define helper_xvcmpeqdp helper_xvcmpeqdp_ppc
+#define helper_xvcmpeqsp helper_xvcmpeqsp_ppc
+#define helper_xvcmpgedp helper_xvcmpgedp_ppc
+#define helper_xvcmpgesp helper_xvcmpgesp_ppc
+#define helper_xvcmpgtdp helper_xvcmpgtdp_ppc
+#define helper_xvcmpgtsp helper_xvcmpgtsp_ppc
+#define helper_xvcmpnedp helper_xvcmpnedp_ppc
+#define helper_xvcmpnesp helper_xvcmpnesp_ppc
+#define helper_xvcvdpsp helper_xvcvdpsp_ppc
+#define helper_xvcvdpsxds helper_xvcvdpsxds_ppc
+#define helper_xvcvdpsxws helper_xvcvdpsxws_ppc
+#define helper_xvcvdpuxds helper_xvcvdpuxds_ppc
+#define helper_xvcvdpuxws helper_xvcvdpuxws_ppc
+#define helper_xvcvhpsp helper_xvcvhpsp_ppc
+#define helper_xvcvspdp helper_xvcvspdp_ppc
+#define helper_xvcvsphp helper_xvcvsphp_ppc
+#define helper_xvcvspsxds helper_xvcvspsxds_ppc
+#define helper_xvcvspsxws helper_xvcvspsxws_ppc
+#define helper_xvcvspuxds helper_xvcvspuxds_ppc
+#define helper_xvcvspuxws helper_xvcvspuxws_ppc
+#define helper_xvcvsxddp helper_xvcvsxddp_ppc
+#define helper_xvcvsxdsp helper_xvcvsxdsp_ppc
+#define helper_xvcvsxwdp helper_xvcvsxwdp_ppc
+#define helper_xvcvsxwsp helper_xvcvsxwsp_ppc
+#define helper_xvcvuxddp helper_xvcvuxddp_ppc
+#define helper_xvcvuxdsp helper_xvcvuxdsp_ppc
+#define helper_xvcvuxwdp helper_xvcvuxwdp_ppc
+#define helper_xvcvuxwsp helper_xvcvuxwsp_ppc
+#define helper_xvdivdp helper_xvdivdp_ppc
+#define helper_xvdivsp helper_xvdivsp_ppc
+#define helper_xvmadddp helper_xvmadddp_ppc
+#define helper_xvmaddsp helper_xvmaddsp_ppc
+#define helper_xvmaxdp helper_xvmaxdp_ppc
+#define helper_xvmaxsp helper_xvmaxsp_ppc
+#define helper_xvmindp helper_xvmindp_ppc
+#define helper_xvminsp helper_xvminsp_ppc
+#define helper_xvmsubdp helper_xvmsubdp_ppc
+#define helper_xvmsubsp helper_xvmsubsp_ppc
+#define helper_xvmuldp helper_xvmuldp_ppc
+#define helper_xvmulsp helper_xvmulsp_ppc
+#define helper_xvnmadddp helper_xvnmadddp_ppc
+#define helper_xvnmaddsp helper_xvnmaddsp_ppc
+#define helper_xvnmsubdp helper_xvnmsubdp_ppc
+#define helper_xvnmsubsp helper_xvnmsubsp_ppc
+#define helper_xvrdpi helper_xvrdpi_ppc
+#define helper_xvrdpic helper_xvrdpic_ppc
+#define helper_xvrdpim helper_xvrdpim_ppc
+#define helper_xvrdpip helper_xvrdpip_ppc
+#define helper_xvrdpiz helper_xvrdpiz_ppc
+#define helper_xvredp helper_xvredp_ppc
+#define helper_xvresp helper_xvresp_ppc
+#define helper_xvrspi helper_xvrspi_ppc
+#define helper_xvrspic helper_xvrspic_ppc
+#define helper_xvrspim helper_xvrspim_ppc
+#define helper_xvrspip helper_xvrspip_ppc
+#define helper_xvrspiz helper_xvrspiz_ppc
+#define helper_xvrsqrtedp helper_xvrsqrtedp_ppc
+#define helper_xvrsqrtesp helper_xvrsqrtesp_ppc
+#define helper_xvsqrtdp helper_xvsqrtdp_ppc
+#define helper_xvsqrtsp helper_xvsqrtsp_ppc
+#define helper_xvsubdp helper_xvsubdp_ppc
+#define helper_xvsubsp helper_xvsubsp_ppc
+#define helper_xvtdivdp helper_xvtdivdp_ppc
+#define helper_xvtdivsp helper_xvtdivsp_ppc
+#define helper_xvtsqrtdp helper_xvtsqrtdp_ppc
+#define helper_xvtsqrtsp helper_xvtsqrtsp_ppc
+#define helper_xvtstdcdp helper_xvtstdcdp_ppc
+#define helper_xvtstdcsp helper_xvtstdcsp_ppc
+#define helper_xvxsigsp helper_xvxsigsp_ppc
+#define helper_xxperm helper_xxperm_ppc
+#define helper_xxpermr helper_xxpermr_ppc
 #endif
diff --git a/qemu/ppc64.h b/qemu/ppc64.h
index 9e4d79ed11..4944950a82 100644
--- a/qemu/ppc64.h
+++ b/qemu/ppc64.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_ppc64
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_ppc64
 #define tcg_gen_st_i64 tcg_gen_st_i64_ppc64
+#define tcg_gen_add_i64 tcg_gen_add_i64_ppc64
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_ppc64
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_ppc64
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_ppc64
 #define cpu_icount_to_ns cpu_icount_to_ns_ppc64
 #define cpu_is_stopped cpu_is_stopped_ppc64
 #define cpu_get_ticks cpu_get_ticks_ppc64
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_ppc64
 #define floatx80_mul floatx80_mul_ppc64
 #define floatx80_div floatx80_div_ppc64
+#define floatx80_modrem floatx80_modrem_ppc64
+#define floatx80_mod floatx80_mod_ppc64
 #define floatx80_rem floatx80_rem_ppc64
 #define floatx80_sqrt floatx80_sqrt_ppc64
 #define floatx80_eq floatx80_eq_ppc64
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_ppc64
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_ppc64
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_ppc64
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_ppc64
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_ppc64
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_ppc64
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_ppc64
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_ppc64
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_ppc64
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_ppc64
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_ppc64
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_ppc64
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_ppc64
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_ppc64
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_ppc64
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_ppc64
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_ppc64
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_ppc64
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_ppc64
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_ppc64
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_ppc64
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_ppc64
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_ppc64
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_ppc64
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_ppc64
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_ppc64
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_ppc64
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_ppc64
 #define tcg_gen_shri_vec tcg_gen_shri_vec_ppc64
 #define tcg_gen_sari_vec tcg_gen_sari_vec_ppc64
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_ppc64
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_ppc64
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_ppc64
 #define tcg_gen_add_vec tcg_gen_add_vec_ppc64
 #define tcg_gen_sub_vec tcg_gen_sub_vec_ppc64
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_ppc64
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_ppc64
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_ppc64
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_ppc64
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_ppc64
 #define tcg_gen_shls_vec tcg_gen_shls_vec_ppc64
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_ppc64
 #define tcg_gen_sars_vec tcg_gen_sars_vec_ppc64
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_ppc64
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_ppc64
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_ppc64
 #define tb_htable_lookup tb_htable_lookup_ppc64
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_ppc64
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_ppc64
 #define tlb_init tlb_init_ppc64
+#define tlb_destroy tlb_destroy_ppc64
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_ppc64
 #define tlb_flush tlb_flush_ppc64
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_ppc64
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_ppc64
 #define get_page_addr_code_hostp get_page_addr_code_hostp_ppc64
 #define get_page_addr_code get_page_addr_code_ppc64
+#define probe_access_flags probe_access_flags_ppc64
 #define probe_access probe_access_ppc64
 #define tlb_vaddr_to_host tlb_vaddr_to_host_ppc64
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_ppc64
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_ppc64
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_ppc64
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_ppc64
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_ppc64
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_ppc64
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_ppc64
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_ppc64
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_ppc64
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_ppc64
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_ppc64
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_ppc64
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_ppc64
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_ppc64
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_ppc64
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_ppc64
 #define cpu_ldub_data_ra cpu_ldub_data_ra_ppc64
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_ppc64
-#define cpu_lduw_data_ra cpu_lduw_data_ra_ppc64
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_ppc64
-#define cpu_ldl_data_ra cpu_ldl_data_ra_ppc64
-#define cpu_ldq_data_ra cpu_ldq_data_ra_ppc64
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_ppc64
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_ppc64
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_ppc64
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_ppc64
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_ppc64
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_ppc64
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_ppc64
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_ppc64
 #define cpu_ldub_data cpu_ldub_data_ppc64
 #define cpu_ldsb_data cpu_ldsb_data_ppc64
-#define cpu_lduw_data cpu_lduw_data_ppc64
-#define cpu_ldsw_data cpu_ldsw_data_ppc64
-#define cpu_ldl_data cpu_ldl_data_ppc64
-#define cpu_ldq_data cpu_ldq_data_ppc64
+#define cpu_lduw_be_data cpu_lduw_be_data_ppc64
+#define cpu_lduw_le_data cpu_lduw_le_data_ppc64
+#define cpu_ldsw_be_data cpu_ldsw_be_data_ppc64
+#define cpu_ldsw_le_data cpu_ldsw_le_data_ppc64
+#define cpu_ldl_be_data cpu_ldl_be_data_ppc64
+#define cpu_ldl_le_data cpu_ldl_le_data_ppc64
+#define cpu_ldq_le_data cpu_ldq_le_data_ppc64
+#define cpu_ldq_be_data cpu_ldq_be_data_ppc64
 #define helper_ret_stb_mmu helper_ret_stb_mmu_ppc64
 #define helper_le_stw_mmu helper_le_stw_mmu_ppc64
 #define helper_be_stw_mmu helper_be_stw_mmu_ppc64
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_ppc64
 #define helper_be_stq_mmu helper_be_stq_mmu_ppc64
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_ppc64
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_ppc64
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_ppc64
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_ppc64
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_ppc64
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_ppc64
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_ppc64
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_ppc64
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_ppc64
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_ppc64
 #define cpu_stb_data_ra cpu_stb_data_ra_ppc64
-#define cpu_stw_data_ra cpu_stw_data_ra_ppc64
-#define cpu_stl_data_ra cpu_stl_data_ra_ppc64
-#define cpu_stq_data_ra cpu_stq_data_ra_ppc64
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_ppc64
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_ppc64
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_ppc64
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_ppc64
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_ppc64
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_ppc64
 #define cpu_stb_data cpu_stb_data_ppc64
-#define cpu_stw_data cpu_stw_data_ppc64
-#define cpu_stl_data cpu_stl_data_ppc64
-#define cpu_stq_data cpu_stq_data_ppc64
+#define cpu_stw_be_data cpu_stw_be_data_ppc64
+#define cpu_stw_le_data cpu_stw_le_data_ppc64
+#define cpu_stl_be_data cpu_stl_be_data_ppc64
+#define cpu_stl_le_data cpu_stl_le_data_ppc64
+#define cpu_stq_be_data cpu_stq_be_data_ppc64
+#define cpu_stq_le_data cpu_stq_le_data_ppc64
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_ppc64
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_ppc64
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_ppc64
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_ppc64
 #define cpu_ldl_code cpu_ldl_code_ppc64
 #define cpu_ldq_code cpu_ldq_code_ppc64
+#define cpu_interrupt_handler cpu_interrupt_handler_ppc64
 #define helper_div_i32 helper_div_i32_ppc64
 #define helper_rem_i32 helper_rem_i32_ppc64
 #define helper_divu_i32 helper_divu_i32_ppc64
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_ppc64
 #define helper_gvec_sar32i helper_gvec_sar32i_ppc64
 #define helper_gvec_sar64i helper_gvec_sar64i_ppc64
+#define helper_gvec_rotl8i helper_gvec_rotl8i_ppc64
+#define helper_gvec_rotl16i helper_gvec_rotl16i_ppc64
+#define helper_gvec_rotl32i helper_gvec_rotl32i_ppc64
+#define helper_gvec_rotl64i helper_gvec_rotl64i_ppc64
 #define helper_gvec_shl8v helper_gvec_shl8v_ppc64
 #define helper_gvec_shl16v helper_gvec_shl16v_ppc64
 #define helper_gvec_shl32v helper_gvec_shl32v_ppc64
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_ppc64
 #define helper_gvec_sar32v helper_gvec_sar32v_ppc64
 #define helper_gvec_sar64v helper_gvec_sar64v_ppc64
+#define helper_gvec_rotl8v helper_gvec_rotl8v_ppc64
+#define helper_gvec_rotl16v helper_gvec_rotl16v_ppc64
+#define helper_gvec_rotl32v helper_gvec_rotl32v_ppc64
+#define helper_gvec_rotl64v helper_gvec_rotl64v_ppc64
+#define helper_gvec_rotr8v helper_gvec_rotr8v_ppc64
+#define helper_gvec_rotr16v helper_gvec_rotr16v_ppc64
+#define helper_gvec_rotr32v helper_gvec_rotr32v_ppc64
+#define helper_gvec_rotr64v helper_gvec_rotr64v_ppc64
 #define helper_gvec_eq8 helper_gvec_eq8_ppc64
 #define helper_gvec_ne8 helper_gvec_ne8_ppc64
 #define helper_gvec_lt8 helper_gvec_lt8_ppc64
@@ -1575,6 +1629,33 @@
 #define helper_tbegin helper_tbegin_ppc64
 #define helper_load_dump_spr helper_load_dump_spr_ppc64
 #define helper_store_dump_spr helper_store_dump_spr_ppc64
+#define store_fpscr store_fpscr_ppc64
+#define helper_store_fpscr helper_store_fpscr_ppc64
+#define helper_float_check_status helper_float_check_status_ppc64
+#define helper_reset_fpstatus helper_reset_fpstatus_ppc64
+#define helper_fadd helper_fadd_ppc64
+#define helper_fsub helper_fsub_ppc64
+#define helper_fmul helper_fmul_ppc64
+#define helper_fdiv helper_fdiv_ppc64
+#define helper_fctiw helper_fctiw_ppc64
+#define helper_fctiwz helper_fctiwz_ppc64
+#define helper_fctiwuz helper_fctiwuz_ppc64
+#define helper_fctid helper_fctid_ppc64
+#define helper_fctidz helper_fctidz_ppc64
+#define helper_fctidu helper_fctidu_ppc64
+#define helper_fctiduz helper_fctiduz_ppc64
+#define helper_fcfid helper_fcfid_ppc64
+#define helper_fcfids helper_fcfids_ppc64
+#define helper_fcfidu helper_fcfidu_ppc64
+#define helper_fcfidus helper_fcfidus_ppc64
+#define helper_frin helper_frin_ppc64
+#define helper_friz helper_friz_ppc64
+#define helper_frip helper_frip_ppc64
+#define helper_frim helper_frim_ppc64
+#define helper_fmadd helper_fmadd_ppc64
+#define helper_fnmadd helper_fnmadd_ppc64
+#define helper_fmsub helper_fmsub_ppc64
+#define helper_fnmsub helper_fnmsub_ppc64
 #define helper_hfscr_facility_check helper_hfscr_facility_check_ppc64
 #define helper_fscr_facility_check helper_fscr_facility_check_ppc64
 #define helper_msr_facility_check helper_msr_facility_check_ppc64
@@ -1726,6 +1807,243 @@
 #define ppc_hash32_handle_mmu_fault ppc_hash32_handle_mmu_fault_ppc64
 #define gen_helper_store_booke_tsr gen_helper_store_booke_tsr_ppc64
 #define gen_helper_store_booke_tcr gen_helper_store_booke_tcr_ppc64
+#define gen_helper_store_fpscr gen_helper_store_fpscr_ppc64
 #define store_booke_tcr store_booke_tcr_ppc64
 #define ppc_hash32_get_phys_page_debug ppc_hash32_get_phys_page_debug_ppc64
+#define helper_compute_fprf_float128 helper_compute_fprf_float128_ppc64
+#define helper_compute_fprf_float16 helper_compute_fprf_float16_ppc64
+#define helper_compute_fprf_float32 helper_compute_fprf_float32_ppc64
+#define helper_compute_fprf_float64 helper_compute_fprf_float64_ppc64
+#define helper_efdadd helper_efdadd_ppc64
+#define helper_efdcfs helper_efdcfs_ppc64
+#define helper_efdcfsf helper_efdcfsf_ppc64
+#define helper_efdcfsi helper_efdcfsi_ppc64
+#define helper_efdcfsid helper_efdcfsid_ppc64
+#define helper_efdcfuf helper_efdcfuf_ppc64
+#define helper_efdcfui helper_efdcfui_ppc64
+#define helper_efdcfuid helper_efdcfuid_ppc64
+#define helper_efdcmpeq helper_efdcmpeq_ppc64
+#define helper_efdcmpgt helper_efdcmpgt_ppc64
+#define helper_efdcmplt helper_efdcmplt_ppc64
+#define helper_efdctsf helper_efdctsf_ppc64
+#define helper_efdctsi helper_efdctsi_ppc64
+#define helper_efdctsidz helper_efdctsidz_ppc64
+#define helper_efdctsiz helper_efdctsiz_ppc64
+#define helper_efdctuf helper_efdctuf_ppc64
+#define helper_efdctui helper_efdctui_ppc64
+#define helper_efdctuidz helper_efdctuidz_ppc64
+#define helper_efdctuiz helper_efdctuiz_ppc64
+#define helper_efddiv helper_efddiv_ppc64
+#define helper_efdmul helper_efdmul_ppc64
+#define helper_efdsub helper_efdsub_ppc64
+#define helper_efdtsteq helper_efdtsteq_ppc64
+#define helper_efdtstgt helper_efdtstgt_ppc64
+#define helper_efdtstlt helper_efdtstlt_ppc64
+#define helper_efsadd helper_efsadd_ppc64
+#define helper_efscfd helper_efscfd_ppc64
+#define helper_efscfsf helper_efscfsf_ppc64
+#define helper_efscfsi helper_efscfsi_ppc64
+#define helper_efscfuf helper_efscfuf_ppc64
+#define helper_efscfui helper_efscfui_ppc64
+#define helper_efscmpeq helper_efscmpeq_ppc64
+#define helper_efscmpgt helper_efscmpgt_ppc64
+#define helper_efscmplt helper_efscmplt_ppc64
+#define helper_efsctsf helper_efsctsf_ppc64
+#define helper_efsctsi helper_efsctsi_ppc64
+#define helper_efsctsiz helper_efsctsiz_ppc64
+#define helper_efsctuf helper_efsctuf_ppc64
+#define helper_efsctui helper_efsctui_ppc64
+#define helper_efsctuiz helper_efsctuiz_ppc64
+#define helper_efsdiv helper_efsdiv_ppc64
+#define helper_efsmul helper_efsmul_ppc64
+#define helper_efssub helper_efssub_ppc64
+#define helper_efststeq helper_efststeq_ppc64
+#define helper_efststgt helper_efststgt_ppc64
+#define helper_efststlt helper_efststlt_ppc64
+#define helper_evfsadd helper_evfsadd_ppc64
+#define helper_evfscfsf helper_evfscfsf_ppc64
+#define helper_evfscfsi helper_evfscfsi_ppc64
+#define helper_evfscfuf helper_evfscfuf_ppc64
+#define helper_evfscfui helper_evfscfui_ppc64
+#define helper_evfscmpeq helper_evfscmpeq_ppc64
+#define helper_evfscmpgt helper_evfscmpgt_ppc64
+#define helper_evfscmplt helper_evfscmplt_ppc64
+#define helper_evfsctsf helper_evfsctsf_ppc64
+#define helper_evfsctsi helper_evfsctsi_ppc64
+#define helper_evfsctsiz helper_evfsctsiz_ppc64
+#define helper_evfsctuf helper_evfsctuf_ppc64
+#define helper_evfsctui helper_evfsctui_ppc64
+#define helper_evfsctuiz helper_evfsctuiz_ppc64
+#define helper_evfsdiv helper_evfsdiv_ppc64
+#define helper_evfsmul helper_evfsmul_ppc64
+#define helper_evfssub helper_evfssub_ppc64
+#define helper_evfststeq helper_evfststeq_ppc64
+#define helper_evfststgt helper_evfststgt_ppc64
+#define helper_evfststlt helper_evfststlt_ppc64
+#define helper_fcmpo helper_fcmpo_ppc64
+#define helper_fcmpu helper_fcmpu_ppc64
+#define helper_fctiwu helper_fctiwu_ppc64
+#define helper_fpscr_clrbit helper_fpscr_clrbit_ppc64
+#define helper_fpscr_setbit helper_fpscr_setbit_ppc64
+#define helper_fre helper_fre_ppc64
+#define helper_fres helper_fres_ppc64
+#define helper_frsp helper_frsp_ppc64
+#define helper_frsqrte helper_frsqrte_ppc64
+#define helper_fsel helper_fsel_ppc64
+#define helper_fsqrt helper_fsqrt_ppc64
+#define helper_ftdiv helper_ftdiv_ppc64
+#define helper_ftsqrt helper_ftsqrt_ppc64
+#define helper_todouble helper_todouble_ppc64
+#define helper_tosingle helper_tosingle_ppc64
+#define helper_xsadddp helper_xsadddp_ppc64
+#define helper_xsaddqp helper_xsaddqp_ppc64
+#define helper_xsaddsp helper_xsaddsp_ppc64
+#define helper_xscmpeqdp helper_xscmpeqdp_ppc64
+#define helper_xscmpexpdp helper_xscmpexpdp_ppc64
+#define helper_xscmpexpqp helper_xscmpexpqp_ppc64
+#define helper_xscmpgedp helper_xscmpgedp_ppc64
+#define helper_xscmpgtdp helper_xscmpgtdp_ppc64
+#define helper_xscmpnedp helper_xscmpnedp_ppc64
+#define helper_xscmpodp helper_xscmpodp_ppc64
+#define helper_xscmpoqp helper_xscmpoqp_ppc64
+#define helper_xscmpudp helper_xscmpudp_ppc64
+#define helper_xscmpuqp helper_xscmpuqp_ppc64
+#define helper_xscvdphp helper_xscvdphp_ppc64
+#define helper_xscvdpqp helper_xscvdpqp_ppc64
+#define helper_xscvdpsp helper_xscvdpsp_ppc64
+#define helper_xscvdpspn helper_xscvdpspn_ppc64
+#define helper_xscvdpsxds helper_xscvdpsxds_ppc64
+#define helper_xscvdpsxws helper_xscvdpsxws_ppc64
+#define helper_xscvdpuxds helper_xscvdpuxds_ppc64
+#define helper_xscvdpuxws helper_xscvdpuxws_ppc64
+#define helper_xscvhpdp helper_xscvhpdp_ppc64
+#define helper_xscvqpdp helper_xscvqpdp_ppc64
+#define helper_xscvqpsdz helper_xscvqpsdz_ppc64
+#define helper_xscvqpswz helper_xscvqpswz_ppc64
+#define helper_xscvqpudz helper_xscvqpudz_ppc64
+#define helper_xscvqpuwz helper_xscvqpuwz_ppc64
+#define helper_xscvsdqp helper_xscvsdqp_ppc64
+#define helper_xscvspdp helper_xscvspdp_ppc64
+#define helper_xscvspdpn helper_xscvspdpn_ppc64
+#define helper_xscvsxddp helper_xscvsxddp_ppc64
+#define helper_xscvsxdsp helper_xscvsxdsp_ppc64
+#define helper_xscvudqp helper_xscvudqp_ppc64
+#define helper_xscvuxddp helper_xscvuxddp_ppc64
+#define helper_xscvuxdsp helper_xscvuxdsp_ppc64
+#define helper_xsdivdp helper_xsdivdp_ppc64
+#define helper_xsdivqp helper_xsdivqp_ppc64
+#define helper_xsdivsp helper_xsdivsp_ppc64
+#define helper_xsmadddp helper_xsmadddp_ppc64
+#define helper_xsmaddsp helper_xsmaddsp_ppc64
+#define helper_xsmaxcdp helper_xsmaxcdp_ppc64
+#define helper_xsmaxdp helper_xsmaxdp_ppc64
+#define helper_xsmaxjdp helper_xsmaxjdp_ppc64
+#define helper_xsmincdp helper_xsmincdp_ppc64
+#define helper_xsmindp helper_xsmindp_ppc64
+#define helper_xsminjdp helper_xsminjdp_ppc64
+#define helper_xsmsubdp helper_xsmsubdp_ppc64
+#define helper_xsmsubsp helper_xsmsubsp_ppc64
+#define helper_xsmuldp helper_xsmuldp_ppc64
+#define helper_xsmulqp helper_xsmulqp_ppc64
+#define helper_xsmulsp helper_xsmulsp_ppc64
+#define helper_xsnmadddp helper_xsnmadddp_ppc64
+#define helper_xsnmaddsp helper_xsnmaddsp_ppc64
+#define helper_xsnmsubdp helper_xsnmsubdp_ppc64
+#define helper_xsnmsubsp helper_xsnmsubsp_ppc64
+#define helper_xsrdpi helper_xsrdpi_ppc64
+#define helper_xsrdpic helper_xsrdpic_ppc64
+#define helper_xsrdpim helper_xsrdpim_ppc64
+#define helper_xsrdpip helper_xsrdpip_ppc64
+#define helper_xsrdpiz helper_xsrdpiz_ppc64
+#define helper_xsredp helper_xsredp_ppc64
+#define helper_xsresp helper_xsresp_ppc64
+#define helper_xsrqpi helper_xsrqpi_ppc64
+#define helper_xsrqpxp helper_xsrqpxp_ppc64
+#define helper_xsrsp helper_xsrsp_ppc64
+#define helper_xsrsqrtedp helper_xsrsqrtedp_ppc64
+#define helper_xsrsqrtesp helper_xsrsqrtesp_ppc64
+#define helper_xssqrtdp helper_xssqrtdp_ppc64
+#define helper_xssqrtqp helper_xssqrtqp_ppc64
+#define helper_xssqrtsp helper_xssqrtsp_ppc64
+#define helper_xssubdp helper_xssubdp_ppc64
+#define helper_xssubqp helper_xssubqp_ppc64
+#define helper_xssubsp helper_xssubsp_ppc64
+#define helper_xstdivdp helper_xstdivdp_ppc64
+#define helper_xstsqrtdp helper_xstsqrtdp_ppc64
+#define helper_xststdcdp helper_xststdcdp_ppc64
+#define helper_xststdcqp helper_xststdcqp_ppc64
+#define helper_xststdcsp helper_xststdcsp_ppc64
+#define helper_xvadddp helper_xvadddp_ppc64
+#define helper_xvaddsp helper_xvaddsp_ppc64
+#define helper_xvcmpeqdp helper_xvcmpeqdp_ppc64
+#define helper_xvcmpeqsp helper_xvcmpeqsp_ppc64
+#define helper_xvcmpgedp helper_xvcmpgedp_ppc64
+#define helper_xvcmpgesp helper_xvcmpgesp_ppc64
+#define helper_xvcmpgtdp helper_xvcmpgtdp_ppc64
+#define helper_xvcmpgtsp helper_xvcmpgtsp_ppc64
+#define helper_xvcmpnedp helper_xvcmpnedp_ppc64
+#define helper_xvcmpnesp helper_xvcmpnesp_ppc64
+#define helper_xvcvdpsp helper_xvcvdpsp_ppc64
+#define helper_xvcvdpsxds helper_xvcvdpsxds_ppc64
+#define helper_xvcvdpsxws helper_xvcvdpsxws_ppc64
+#define helper_xvcvdpuxds helper_xvcvdpuxds_ppc64
+#define helper_xvcvdpuxws helper_xvcvdpuxws_ppc64
+#define helper_xvcvhpsp helper_xvcvhpsp_ppc64
+#define helper_xvcvspdp helper_xvcvspdp_ppc64
+#define helper_xvcvsphp helper_xvcvsphp_ppc64
+#define helper_xvcvspsxds helper_xvcvspsxds_ppc64
+#define helper_xvcvspsxws helper_xvcvspsxws_ppc64
+#define helper_xvcvspuxds helper_xvcvspuxds_ppc64
+#define helper_xvcvspuxws helper_xvcvspuxws_ppc64
+#define helper_xvcvsxddp helper_xvcvsxddp_ppc64
+#define helper_xvcvsxdsp helper_xvcvsxdsp_ppc64
+#define helper_xvcvsxwdp helper_xvcvsxwdp_ppc64
+#define helper_xvcvsxwsp helper_xvcvsxwsp_ppc64
+#define helper_xvcvuxddp helper_xvcvuxddp_ppc64
+#define helper_xvcvuxdsp helper_xvcvuxdsp_ppc64
+#define helper_xvcvuxwdp helper_xvcvuxwdp_ppc64
+#define helper_xvcvuxwsp helper_xvcvuxwsp_ppc64
+#define helper_xvdivdp helper_xvdivdp_ppc64
+#define helper_xvdivsp helper_xvdivsp_ppc64
+#define helper_xvmadddp helper_xvmadddp_ppc64
+#define helper_xvmaddsp helper_xvmaddsp_ppc64
+#define helper_xvmaxdp helper_xvmaxdp_ppc64
+#define helper_xvmaxsp helper_xvmaxsp_ppc64
+#define helper_xvmindp helper_xvmindp_ppc64
+#define helper_xvminsp helper_xvminsp_ppc64
+#define helper_xvmsubdp helper_xvmsubdp_ppc64
+#define helper_xvmsubsp helper_xvmsubsp_ppc64
+#define helper_xvmuldp helper_xvmuldp_ppc64
+#define helper_xvmulsp helper_xvmulsp_ppc64
+#define helper_xvnmadddp helper_xvnmadddp_ppc64
+#define helper_xvnmaddsp helper_xvnmaddsp_ppc64
+#define helper_xvnmsubdp helper_xvnmsubdp_ppc64
+#define helper_xvnmsubsp helper_xvnmsubsp_ppc64
+#define helper_xvrdpi helper_xvrdpi_ppc64
+#define helper_xvrdpic helper_xvrdpic_ppc64
+#define helper_xvrdpim helper_xvrdpim_ppc64
+#define helper_xvrdpip helper_xvrdpip_ppc64
+#define helper_xvrdpiz helper_xvrdpiz_ppc64
+#define helper_xvredp helper_xvredp_ppc64
+#define helper_xvresp helper_xvresp_ppc64
+#define helper_xvrspi helper_xvrspi_ppc64
+#define helper_xvrspic helper_xvrspic_ppc64
+#define helper_xvrspim helper_xvrspim_ppc64
+#define helper_xvrspip helper_xvrspip_ppc64
+#define helper_xvrspiz helper_xvrspiz_ppc64
+#define helper_xvrsqrtedp helper_xvrsqrtedp_ppc64
+#define helper_xvrsqrtesp helper_xvrsqrtesp_ppc64
+#define helper_xvsqrtdp helper_xvsqrtdp_ppc64
+#define helper_xvsqrtsp helper_xvsqrtsp_ppc64
+#define helper_xvsubdp helper_xvsubdp_ppc64
+#define helper_xvsubsp helper_xvsubsp_ppc64
+#define helper_xvtdivdp helper_xvtdivdp_ppc64
+#define helper_xvtdivsp helper_xvtdivsp_ppc64
+#define helper_xvtsqrtdp helper_xvtsqrtdp_ppc64
+#define helper_xvtsqrtsp helper_xvtsqrtsp_ppc64
+#define helper_xvtstdcdp helper_xvtstdcdp_ppc64
+#define helper_xvtstdcsp helper_xvtstdcsp_ppc64
+#define helper_xvxsigsp helper_xvxsigsp_ppc64
+#define helper_xxperm helper_xxperm_ppc64
+#define helper_xxpermr helper_xxpermr_ppc64
 #endif
diff --git a/qemu/riscv32.h b/qemu/riscv32.h
index 90889da546..edc897463b 100644
--- a/qemu/riscv32.h
+++ b/qemu/riscv32.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_riscv32
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_riscv32
 #define tcg_gen_st_i64 tcg_gen_st_i64_riscv32
+#define tcg_gen_add_i64 tcg_gen_add_i64_riscv32
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_riscv32
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_riscv32
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_riscv32
 #define cpu_icount_to_ns cpu_icount_to_ns_riscv32
 #define cpu_is_stopped cpu_is_stopped_riscv32
 #define cpu_get_ticks cpu_get_ticks_riscv32
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_riscv32
 #define floatx80_mul floatx80_mul_riscv32
 #define floatx80_div floatx80_div_riscv32
+#define floatx80_modrem floatx80_modrem_riscv32
+#define floatx80_mod floatx80_mod_riscv32
 #define floatx80_rem floatx80_rem_riscv32
 #define floatx80_sqrt floatx80_sqrt_riscv32
 #define floatx80_eq floatx80_eq_riscv32
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_riscv32
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_riscv32
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_riscv32
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_riscv32
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_riscv32
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_riscv32
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_riscv32
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_riscv32
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_riscv32
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_riscv32
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_riscv32
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_riscv32
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_riscv32
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_riscv32
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_riscv32
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_riscv32
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_riscv32
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_riscv32
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_riscv32
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_riscv32
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_riscv32
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_riscv32
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_riscv32
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_riscv32
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_riscv32
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_riscv32
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_riscv32
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_riscv32
 #define tcg_gen_shri_vec tcg_gen_shri_vec_riscv32
 #define tcg_gen_sari_vec tcg_gen_sari_vec_riscv32
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_riscv32
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_riscv32
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_riscv32
 #define tcg_gen_add_vec tcg_gen_add_vec_riscv32
 #define tcg_gen_sub_vec tcg_gen_sub_vec_riscv32
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_riscv32
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_riscv32
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_riscv32
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_riscv32
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_riscv32
 #define tcg_gen_shls_vec tcg_gen_shls_vec_riscv32
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_riscv32
 #define tcg_gen_sars_vec tcg_gen_sars_vec_riscv32
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_riscv32
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_riscv32
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_riscv32
 #define tb_htable_lookup tb_htable_lookup_riscv32
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_riscv32
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_riscv32
 #define tlb_init tlb_init_riscv32
+#define tlb_destroy tlb_destroy_riscv32
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_riscv32
 #define tlb_flush tlb_flush_riscv32
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_riscv32
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_riscv32
 #define get_page_addr_code_hostp get_page_addr_code_hostp_riscv32
 #define get_page_addr_code get_page_addr_code_riscv32
+#define probe_access_flags probe_access_flags_riscv32
 #define probe_access probe_access_riscv32
 #define tlb_vaddr_to_host tlb_vaddr_to_host_riscv32
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_riscv32
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_riscv32
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_riscv32
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_riscv32
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_riscv32
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_riscv32
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_riscv32
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_riscv32
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_riscv32
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_riscv32
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_riscv32
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_riscv32
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_riscv32
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_riscv32
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_riscv32
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_riscv32
 #define cpu_ldub_data_ra cpu_ldub_data_ra_riscv32
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_riscv32
-#define cpu_lduw_data_ra cpu_lduw_data_ra_riscv32
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_riscv32
-#define cpu_ldl_data_ra cpu_ldl_data_ra_riscv32
-#define cpu_ldq_data_ra cpu_ldq_data_ra_riscv32
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_riscv32
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_riscv32
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_riscv32
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_riscv32
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_riscv32
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_riscv32
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_riscv32
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_riscv32
 #define cpu_ldub_data cpu_ldub_data_riscv32
 #define cpu_ldsb_data cpu_ldsb_data_riscv32
-#define cpu_lduw_data cpu_lduw_data_riscv32
-#define cpu_ldsw_data cpu_ldsw_data_riscv32
-#define cpu_ldl_data cpu_ldl_data_riscv32
-#define cpu_ldq_data cpu_ldq_data_riscv32
+#define cpu_lduw_be_data cpu_lduw_be_data_riscv32
+#define cpu_lduw_le_data cpu_lduw_le_data_riscv32
+#define cpu_ldsw_be_data cpu_ldsw_be_data_riscv32
+#define cpu_ldsw_le_data cpu_ldsw_le_data_riscv32
+#define cpu_ldl_be_data cpu_ldl_be_data_riscv32
+#define cpu_ldl_le_data cpu_ldl_le_data_riscv32
+#define cpu_ldq_le_data cpu_ldq_le_data_riscv32
+#define cpu_ldq_be_data cpu_ldq_be_data_riscv32
 #define helper_ret_stb_mmu helper_ret_stb_mmu_riscv32
 #define helper_le_stw_mmu helper_le_stw_mmu_riscv32
 #define helper_be_stw_mmu helper_be_stw_mmu_riscv32
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_riscv32
 #define helper_be_stq_mmu helper_be_stq_mmu_riscv32
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_riscv32
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_riscv32
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_riscv32
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_riscv32
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_riscv32
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_riscv32
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_riscv32
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_riscv32
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_riscv32
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_riscv32
 #define cpu_stb_data_ra cpu_stb_data_ra_riscv32
-#define cpu_stw_data_ra cpu_stw_data_ra_riscv32
-#define cpu_stl_data_ra cpu_stl_data_ra_riscv32
-#define cpu_stq_data_ra cpu_stq_data_ra_riscv32
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_riscv32
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_riscv32
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_riscv32
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_riscv32
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_riscv32
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_riscv32
 #define cpu_stb_data cpu_stb_data_riscv32
-#define cpu_stw_data cpu_stw_data_riscv32
-#define cpu_stl_data cpu_stl_data_riscv32
-#define cpu_stq_data cpu_stq_data_riscv32
+#define cpu_stw_be_data cpu_stw_be_data_riscv32
+#define cpu_stw_le_data cpu_stw_le_data_riscv32
+#define cpu_stl_be_data cpu_stl_be_data_riscv32
+#define cpu_stl_le_data cpu_stl_le_data_riscv32
+#define cpu_stq_be_data cpu_stq_be_data_riscv32
+#define cpu_stq_le_data cpu_stq_le_data_riscv32
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_riscv32
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_riscv32
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_riscv32
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_riscv32
 #define cpu_ldl_code cpu_ldl_code_riscv32
 #define cpu_ldq_code cpu_ldq_code_riscv32
+#define cpu_interrupt_handler cpu_interrupt_handler_riscv32
 #define helper_div_i32 helper_div_i32_riscv32
 #define helper_rem_i32 helper_rem_i32_riscv32
 #define helper_divu_i32 helper_divu_i32_riscv32
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_riscv32
 #define helper_gvec_sar32i helper_gvec_sar32i_riscv32
 #define helper_gvec_sar64i helper_gvec_sar64i_riscv32
+#define helper_gvec_rotl8i helper_gvec_rotl8i_riscv32
+#define helper_gvec_rotl16i helper_gvec_rotl16i_riscv32
+#define helper_gvec_rotl32i helper_gvec_rotl32i_riscv32
+#define helper_gvec_rotl64i helper_gvec_rotl64i_riscv32
 #define helper_gvec_shl8v helper_gvec_shl8v_riscv32
 #define helper_gvec_shl16v helper_gvec_shl16v_riscv32
 #define helper_gvec_shl32v helper_gvec_shl32v_riscv32
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_riscv32
 #define helper_gvec_sar32v helper_gvec_sar32v_riscv32
 #define helper_gvec_sar64v helper_gvec_sar64v_riscv32
+#define helper_gvec_rotl8v helper_gvec_rotl8v_riscv32
+#define helper_gvec_rotl16v helper_gvec_rotl16v_riscv32
+#define helper_gvec_rotl32v helper_gvec_rotl32v_riscv32
+#define helper_gvec_rotl64v helper_gvec_rotl64v_riscv32
+#define helper_gvec_rotr8v helper_gvec_rotr8v_riscv32
+#define helper_gvec_rotr16v helper_gvec_rotr16v_riscv32
+#define helper_gvec_rotr32v helper_gvec_rotr32v_riscv32
+#define helper_gvec_rotr64v helper_gvec_rotr64v_riscv32
 #define helper_gvec_eq8 helper_gvec_eq8_riscv32
 #define helper_gvec_ne8 helper_gvec_ne8_riscv32
 #define helper_gvec_lt8 helper_gvec_lt8_riscv32
@@ -1366,6 +1420,7 @@
 #define helper_mret helper_mret_riscv32
 #define helper_wfi helper_wfi_riscv32
 #define helper_tlb_flush helper_tlb_flush_riscv32
+#define helper_hyp_tlb_flush helper_hyp_tlb_flush_riscv32
 #define pmp_hart_has_privs pmp_hart_has_privs_riscv32
 #define pmpcfg_csr_write pmpcfg_csr_write_riscv32
 #define pmpcfg_csr_read pmpcfg_csr_read_riscv32
@@ -1386,4 +1441,1006 @@
 #define gen_helper_tlb_flush gen_helper_tlb_flush_riscv32
 #define riscv_fpr_regnames riscv_fpr_regnames_riscv32
 #define riscv_int_regnames riscv_int_regnames_riscv32
+#define fclass_d fclass_d_riscv32
+#define fclass_h fclass_h_riscv32
+#define fclass_s fclass_s_riscv32
+#define helper_vaadd_vv_b helper_vaadd_vv_b_riscv32
+#define helper_vaadd_vv_d helper_vaadd_vv_d_riscv32
+#define helper_vaadd_vv_h helper_vaadd_vv_h_riscv32
+#define helper_vaadd_vv_w helper_vaadd_vv_w_riscv32
+#define helper_vaadd_vx_b helper_vaadd_vx_b_riscv32
+#define helper_vaadd_vx_d helper_vaadd_vx_d_riscv32
+#define helper_vaadd_vx_h helper_vaadd_vx_h_riscv32
+#define helper_vaadd_vx_w helper_vaadd_vx_w_riscv32
+#define helper_vadc_vvm_b helper_vadc_vvm_b_riscv32
+#define helper_vadc_vvm_d helper_vadc_vvm_d_riscv32
+#define helper_vadc_vvm_h helper_vadc_vvm_h_riscv32
+#define helper_vadc_vvm_w helper_vadc_vvm_w_riscv32
+#define helper_vadc_vxm_b helper_vadc_vxm_b_riscv32
+#define helper_vadc_vxm_d helper_vadc_vxm_d_riscv32
+#define helper_vadc_vxm_h helper_vadc_vxm_h_riscv32
+#define helper_vadc_vxm_w helper_vadc_vxm_w_riscv32
+#define helper_vadd_vv_b helper_vadd_vv_b_riscv32
+#define helper_vadd_vv_d helper_vadd_vv_d_riscv32
+#define helper_vadd_vv_h helper_vadd_vv_h_riscv32
+#define helper_vadd_vv_w helper_vadd_vv_w_riscv32
+#define helper_vadd_vx_b helper_vadd_vx_b_riscv32
+#define helper_vadd_vx_d helper_vadd_vx_d_riscv32
+#define helper_vadd_vx_h helper_vadd_vx_h_riscv32
+#define helper_vadd_vx_w helper_vadd_vx_w_riscv32
+#define helper_vamoaddw_v_w helper_vamoaddw_v_w_riscv32
+#define helper_vamoandw_v_w helper_vamoandw_v_w_riscv32
+#define helper_vamomaxuw_v_w helper_vamomaxuw_v_w_riscv32
+#define helper_vamomaxw_v_w helper_vamomaxw_v_w_riscv32
+#define helper_vamominuw_v_w helper_vamominuw_v_w_riscv32
+#define helper_vamominw_v_w helper_vamominw_v_w_riscv32
+#define helper_vamoorw_v_w helper_vamoorw_v_w_riscv32
+#define helper_vamoswapw_v_w helper_vamoswapw_v_w_riscv32
+#define helper_vamoxorw_v_w helper_vamoxorw_v_w_riscv32
+#define helper_vand_vv_b helper_vand_vv_b_riscv32
+#define helper_vand_vv_d helper_vand_vv_d_riscv32
+#define helper_vand_vv_h helper_vand_vv_h_riscv32
+#define helper_vand_vv_w helper_vand_vv_w_riscv32
+#define helper_vand_vx_b helper_vand_vx_b_riscv32
+#define helper_vand_vx_d helper_vand_vx_d_riscv32
+#define helper_vand_vx_h helper_vand_vx_h_riscv32
+#define helper_vand_vx_w helper_vand_vx_w_riscv32
+#define helper_vasub_vv_b helper_vasub_vv_b_riscv32
+#define helper_vasub_vv_d helper_vasub_vv_d_riscv32
+#define helper_vasub_vv_h helper_vasub_vv_h_riscv32
+#define helper_vasub_vv_w helper_vasub_vv_w_riscv32
+#define helper_vasub_vx_b helper_vasub_vx_b_riscv32
+#define helper_vasub_vx_d helper_vasub_vx_d_riscv32
+#define helper_vasub_vx_h helper_vasub_vx_h_riscv32
+#define helper_vasub_vx_w helper_vasub_vx_w_riscv32
+#define helper_vcompress_vm_b helper_vcompress_vm_b_riscv32
+#define helper_vcompress_vm_d helper_vcompress_vm_d_riscv32
+#define helper_vcompress_vm_h helper_vcompress_vm_h_riscv32
+#define helper_vcompress_vm_w helper_vcompress_vm_w_riscv32
+#define helper_vdiv_vv_b helper_vdiv_vv_b_riscv32
+#define helper_vdiv_vv_d helper_vdiv_vv_d_riscv32
+#define helper_vdiv_vv_h helper_vdiv_vv_h_riscv32
+#define helper_vdiv_vv_w helper_vdiv_vv_w_riscv32
+#define helper_vdiv_vx_b helper_vdiv_vx_b_riscv32
+#define helper_vdiv_vx_d helper_vdiv_vx_d_riscv32
+#define helper_vdiv_vx_h helper_vdiv_vx_h_riscv32
+#define helper_vdiv_vx_w helper_vdiv_vx_w_riscv32
+#define helper_vdivu_vv_b helper_vdivu_vv_b_riscv32
+#define helper_vdivu_vv_d helper_vdivu_vv_d_riscv32
+#define helper_vdivu_vv_h helper_vdivu_vv_h_riscv32
+#define helper_vdivu_vv_w helper_vdivu_vv_w_riscv32
+#define helper_vdivu_vx_b helper_vdivu_vx_b_riscv32
+#define helper_vdivu_vx_d helper_vdivu_vx_d_riscv32
+#define helper_vdivu_vx_h helper_vdivu_vx_h_riscv32
+#define helper_vdivu_vx_w helper_vdivu_vx_w_riscv32
+#define helper_vec_rsubs16 helper_vec_rsubs16_riscv32
+#define helper_vec_rsubs32 helper_vec_rsubs32_riscv32
+#define helper_vec_rsubs64 helper_vec_rsubs64_riscv32
+#define helper_vec_rsubs8 helper_vec_rsubs8_riscv32
+#define helper_vfadd_vf_d helper_vfadd_vf_d_riscv32
+#define helper_vfadd_vf_h helper_vfadd_vf_h_riscv32
+#define helper_vfadd_vf_w helper_vfadd_vf_w_riscv32
+#define helper_vfadd_vv_d helper_vfadd_vv_d_riscv32
+#define helper_vfadd_vv_h helper_vfadd_vv_h_riscv32
+#define helper_vfadd_vv_w helper_vfadd_vv_w_riscv32
+#define helper_vfclass_v_d helper_vfclass_v_d_riscv32
+#define helper_vfclass_v_h helper_vfclass_v_h_riscv32
+#define helper_vfclass_v_w helper_vfclass_v_w_riscv32
+#define helper_vfcvt_f_x_v_d helper_vfcvt_f_x_v_d_riscv32
+#define helper_vfcvt_f_x_v_h helper_vfcvt_f_x_v_h_riscv32
+#define helper_vfcvt_f_x_v_w helper_vfcvt_f_x_v_w_riscv32
+#define helper_vfcvt_f_xu_v_d helper_vfcvt_f_xu_v_d_riscv32
+#define helper_vfcvt_f_xu_v_h helper_vfcvt_f_xu_v_h_riscv32
+#define helper_vfcvt_f_xu_v_w helper_vfcvt_f_xu_v_w_riscv32
+#define helper_vfcvt_x_f_v_d helper_vfcvt_x_f_v_d_riscv32
+#define helper_vfcvt_x_f_v_h helper_vfcvt_x_f_v_h_riscv32
+#define helper_vfcvt_x_f_v_w helper_vfcvt_x_f_v_w_riscv32
+#define helper_vfcvt_xu_f_v_d helper_vfcvt_xu_f_v_d_riscv32
+#define helper_vfcvt_xu_f_v_h helper_vfcvt_xu_f_v_h_riscv32
+#define helper_vfcvt_xu_f_v_w helper_vfcvt_xu_f_v_w_riscv32
+#define helper_vfdiv_vf_d helper_vfdiv_vf_d_riscv32
+#define helper_vfdiv_vf_h helper_vfdiv_vf_h_riscv32
+#define helper_vfdiv_vf_w helper_vfdiv_vf_w_riscv32
+#define helper_vfdiv_vv_d helper_vfdiv_vv_d_riscv32
+#define helper_vfdiv_vv_h helper_vfdiv_vv_h_riscv32
+#define helper_vfdiv_vv_w helper_vfdiv_vv_w_riscv32
+#define helper_vfmacc_vf_d helper_vfmacc_vf_d_riscv32
+#define helper_vfmacc_vf_h helper_vfmacc_vf_h_riscv32
+#define helper_vfmacc_vf_w helper_vfmacc_vf_w_riscv32
+#define helper_vfmacc_vv_d helper_vfmacc_vv_d_riscv32
+#define helper_vfmacc_vv_h helper_vfmacc_vv_h_riscv32
+#define helper_vfmacc_vv_w helper_vfmacc_vv_w_riscv32
+#define helper_vfmadd_vf_d helper_vfmadd_vf_d_riscv32
+#define helper_vfmadd_vf_h helper_vfmadd_vf_h_riscv32
+#define helper_vfmadd_vf_w helper_vfmadd_vf_w_riscv32
+#define helper_vfmadd_vv_d helper_vfmadd_vv_d_riscv32
+#define helper_vfmadd_vv_h helper_vfmadd_vv_h_riscv32
+#define helper_vfmadd_vv_w helper_vfmadd_vv_w_riscv32
+#define helper_vfmax_vf_d helper_vfmax_vf_d_riscv32
+#define helper_vfmax_vf_h helper_vfmax_vf_h_riscv32
+#define helper_vfmax_vf_w helper_vfmax_vf_w_riscv32
+#define helper_vfmax_vv_d helper_vfmax_vv_d_riscv32
+#define helper_vfmax_vv_h helper_vfmax_vv_h_riscv32
+#define helper_vfmax_vv_w helper_vfmax_vv_w_riscv32
+#define helper_vfmerge_vfm_d helper_vfmerge_vfm_d_riscv32
+#define helper_vfmerge_vfm_h helper_vfmerge_vfm_h_riscv32
+#define helper_vfmerge_vfm_w helper_vfmerge_vfm_w_riscv32
+#define helper_vfmin_vf_d helper_vfmin_vf_d_riscv32
+#define helper_vfmin_vf_h helper_vfmin_vf_h_riscv32
+#define helper_vfmin_vf_w helper_vfmin_vf_w_riscv32
+#define helper_vfmin_vv_d helper_vfmin_vv_d_riscv32
+#define helper_vfmin_vv_h helper_vfmin_vv_h_riscv32
+#define helper_vfmin_vv_w helper_vfmin_vv_w_riscv32
+#define helper_vfmsac_vf_d helper_vfmsac_vf_d_riscv32
+#define helper_vfmsac_vf_h helper_vfmsac_vf_h_riscv32
+#define helper_vfmsac_vf_w helper_vfmsac_vf_w_riscv32
+#define helper_vfmsac_vv_d helper_vfmsac_vv_d_riscv32
+#define helper_vfmsac_vv_h helper_vfmsac_vv_h_riscv32
+#define helper_vfmsac_vv_w helper_vfmsac_vv_w_riscv32
+#define helper_vfmsub_vf_d helper_vfmsub_vf_d_riscv32
+#define helper_vfmsub_vf_h helper_vfmsub_vf_h_riscv32
+#define helper_vfmsub_vf_w helper_vfmsub_vf_w_riscv32
+#define helper_vfmsub_vv_d helper_vfmsub_vv_d_riscv32
+#define helper_vfmsub_vv_h helper_vfmsub_vv_h_riscv32
+#define helper_vfmsub_vv_w helper_vfmsub_vv_w_riscv32
+#define helper_vfmul_vf_d helper_vfmul_vf_d_riscv32
+#define helper_vfmul_vf_h helper_vfmul_vf_h_riscv32
+#define helper_vfmul_vf_w helper_vfmul_vf_w_riscv32
+#define helper_vfmul_vv_d helper_vfmul_vv_d_riscv32
+#define helper_vfmul_vv_h helper_vfmul_vv_h_riscv32
+#define helper_vfmul_vv_w helper_vfmul_vv_w_riscv32
+#define helper_vfncvt_f_f_v_h helper_vfncvt_f_f_v_h_riscv32
+#define helper_vfncvt_f_f_v_w helper_vfncvt_f_f_v_w_riscv32
+#define helper_vfncvt_f_x_v_h helper_vfncvt_f_x_v_h_riscv32
+#define helper_vfncvt_f_x_v_w helper_vfncvt_f_x_v_w_riscv32
+#define helper_vfncvt_f_xu_v_h helper_vfncvt_f_xu_v_h_riscv32
+#define helper_vfncvt_f_xu_v_w helper_vfncvt_f_xu_v_w_riscv32
+#define helper_vfncvt_x_f_v_h helper_vfncvt_x_f_v_h_riscv32
+#define helper_vfncvt_x_f_v_w helper_vfncvt_x_f_v_w_riscv32
+#define helper_vfncvt_xu_f_v_h helper_vfncvt_xu_f_v_h_riscv32
+#define helper_vfncvt_xu_f_v_w helper_vfncvt_xu_f_v_w_riscv32
+#define helper_vfnmacc_vf_d helper_vfnmacc_vf_d_riscv32
+#define helper_vfnmacc_vf_h helper_vfnmacc_vf_h_riscv32
+#define helper_vfnmacc_vf_w helper_vfnmacc_vf_w_riscv32
+#define helper_vfnmacc_vv_d helper_vfnmacc_vv_d_riscv32
+#define helper_vfnmacc_vv_h helper_vfnmacc_vv_h_riscv32
+#define helper_vfnmacc_vv_w helper_vfnmacc_vv_w_riscv32
+#define helper_vfnmadd_vf_d helper_vfnmadd_vf_d_riscv32
+#define helper_vfnmadd_vf_h helper_vfnmadd_vf_h_riscv32
+#define helper_vfnmadd_vf_w helper_vfnmadd_vf_w_riscv32
+#define helper_vfnmadd_vv_d helper_vfnmadd_vv_d_riscv32
+#define helper_vfnmadd_vv_h helper_vfnmadd_vv_h_riscv32
+#define helper_vfnmadd_vv_w helper_vfnmadd_vv_w_riscv32
+#define helper_vfnmsac_vf_d helper_vfnmsac_vf_d_riscv32
+#define helper_vfnmsac_vf_h helper_vfnmsac_vf_h_riscv32
+#define helper_vfnmsac_vf_w helper_vfnmsac_vf_w_riscv32
+#define helper_vfnmsac_vv_d helper_vfnmsac_vv_d_riscv32
+#define helper_vfnmsac_vv_h helper_vfnmsac_vv_h_riscv32
+#define helper_vfnmsac_vv_w helper_vfnmsac_vv_w_riscv32
+#define helper_vfnmsub_vf_d helper_vfnmsub_vf_d_riscv32
+#define helper_vfnmsub_vf_h helper_vfnmsub_vf_h_riscv32
+#define helper_vfnmsub_vf_w helper_vfnmsub_vf_w_riscv32
+#define helper_vfnmsub_vv_d helper_vfnmsub_vv_d_riscv32
+#define helper_vfnmsub_vv_h helper_vfnmsub_vv_h_riscv32
+#define helper_vfnmsub_vv_w helper_vfnmsub_vv_w_riscv32
+#define helper_vfrdiv_vf_d helper_vfrdiv_vf_d_riscv32
+#define helper_vfrdiv_vf_h helper_vfrdiv_vf_h_riscv32
+#define helper_vfrdiv_vf_w helper_vfrdiv_vf_w_riscv32
+#define helper_vfredmax_vs_d helper_vfredmax_vs_d_riscv32
+#define helper_vfredmax_vs_h helper_vfredmax_vs_h_riscv32
+#define helper_vfredmax_vs_w helper_vfredmax_vs_w_riscv32
+#define helper_vfredmin_vs_d helper_vfredmin_vs_d_riscv32
+#define helper_vfredmin_vs_h helper_vfredmin_vs_h_riscv32
+#define helper_vfredmin_vs_w helper_vfredmin_vs_w_riscv32
+#define helper_vfredsum_vs_d helper_vfredsum_vs_d_riscv32
+#define helper_vfredsum_vs_h helper_vfredsum_vs_h_riscv32
+#define helper_vfredsum_vs_w helper_vfredsum_vs_w_riscv32
+#define helper_vfrsub_vf_d helper_vfrsub_vf_d_riscv32
+#define helper_vfrsub_vf_h helper_vfrsub_vf_h_riscv32
+#define helper_vfrsub_vf_w helper_vfrsub_vf_w_riscv32
+#define helper_vfsgnj_vf_d helper_vfsgnj_vf_d_riscv32
+#define helper_vfsgnj_vf_h helper_vfsgnj_vf_h_riscv32
+#define helper_vfsgnj_vf_w helper_vfsgnj_vf_w_riscv32
+#define helper_vfsgnj_vv_d helper_vfsgnj_vv_d_riscv32
+#define helper_vfsgnj_vv_h helper_vfsgnj_vv_h_riscv32
+#define helper_vfsgnj_vv_w helper_vfsgnj_vv_w_riscv32
+#define helper_vfsgnjn_vf_d helper_vfsgnjn_vf_d_riscv32
+#define helper_vfsgnjn_vf_h helper_vfsgnjn_vf_h_riscv32
+#define helper_vfsgnjn_vf_w helper_vfsgnjn_vf_w_riscv32
+#define helper_vfsgnjn_vv_d helper_vfsgnjn_vv_d_riscv32
+#define helper_vfsgnjn_vv_h helper_vfsgnjn_vv_h_riscv32
+#define helper_vfsgnjn_vv_w helper_vfsgnjn_vv_w_riscv32
+#define helper_vfsgnjx_vf_d helper_vfsgnjx_vf_d_riscv32
+#define helper_vfsgnjx_vf_h helper_vfsgnjx_vf_h_riscv32
+#define helper_vfsgnjx_vf_w helper_vfsgnjx_vf_w_riscv32
+#define helper_vfsgnjx_vv_d helper_vfsgnjx_vv_d_riscv32
+#define helper_vfsgnjx_vv_h helper_vfsgnjx_vv_h_riscv32
+#define helper_vfsgnjx_vv_w helper_vfsgnjx_vv_w_riscv32
+#define helper_vfsqrt_v_d helper_vfsqrt_v_d_riscv32
+#define helper_vfsqrt_v_h helper_vfsqrt_v_h_riscv32
+#define helper_vfsqrt_v_w helper_vfsqrt_v_w_riscv32
+#define helper_vfsub_vf_d helper_vfsub_vf_d_riscv32
+#define helper_vfsub_vf_h helper_vfsub_vf_h_riscv32
+#define helper_vfsub_vf_w helper_vfsub_vf_w_riscv32
+#define helper_vfsub_vv_d helper_vfsub_vv_d_riscv32
+#define helper_vfsub_vv_h helper_vfsub_vv_h_riscv32
+#define helper_vfsub_vv_w helper_vfsub_vv_w_riscv32
+#define helper_vfwadd_vf_h helper_vfwadd_vf_h_riscv32
+#define helper_vfwadd_vf_w helper_vfwadd_vf_w_riscv32
+#define helper_vfwadd_vv_h helper_vfwadd_vv_h_riscv32
+#define helper_vfwadd_vv_w helper_vfwadd_vv_w_riscv32
+#define helper_vfwadd_wf_h helper_vfwadd_wf_h_riscv32
+#define helper_vfwadd_wf_w helper_vfwadd_wf_w_riscv32
+#define helper_vfwadd_wv_h helper_vfwadd_wv_h_riscv32
+#define helper_vfwadd_wv_w helper_vfwadd_wv_w_riscv32
+#define helper_vfwcvt_f_f_v_h helper_vfwcvt_f_f_v_h_riscv32
+#define helper_vfwcvt_f_f_v_w helper_vfwcvt_f_f_v_w_riscv32
+#define helper_vfwcvt_f_x_v_h helper_vfwcvt_f_x_v_h_riscv32
+#define helper_vfwcvt_f_x_v_w helper_vfwcvt_f_x_v_w_riscv32
+#define helper_vfwcvt_f_xu_v_h helper_vfwcvt_f_xu_v_h_riscv32
+#define helper_vfwcvt_f_xu_v_w helper_vfwcvt_f_xu_v_w_riscv32
+#define helper_vfwcvt_x_f_v_h helper_vfwcvt_x_f_v_h_riscv32
+#define helper_vfwcvt_x_f_v_w helper_vfwcvt_x_f_v_w_riscv32
+#define helper_vfwcvt_xu_f_v_h helper_vfwcvt_xu_f_v_h_riscv32
+#define helper_vfwcvt_xu_f_v_w helper_vfwcvt_xu_f_v_w_riscv32
+#define helper_vfwmacc_vf_h helper_vfwmacc_vf_h_riscv32
+#define helper_vfwmacc_vf_w helper_vfwmacc_vf_w_riscv32
+#define helper_vfwmacc_vv_h helper_vfwmacc_vv_h_riscv32
+#define helper_vfwmacc_vv_w helper_vfwmacc_vv_w_riscv32
+#define helper_vfwmsac_vf_h helper_vfwmsac_vf_h_riscv32
+#define helper_vfwmsac_vf_w helper_vfwmsac_vf_w_riscv32
+#define helper_vfwmsac_vv_h helper_vfwmsac_vv_h_riscv32
+#define helper_vfwmsac_vv_w helper_vfwmsac_vv_w_riscv32
+#define helper_vfwmul_vf_h helper_vfwmul_vf_h_riscv32
+#define helper_vfwmul_vf_w helper_vfwmul_vf_w_riscv32
+#define helper_vfwmul_vv_h helper_vfwmul_vv_h_riscv32
+#define helper_vfwmul_vv_w helper_vfwmul_vv_w_riscv32
+#define helper_vfwnmacc_vf_h helper_vfwnmacc_vf_h_riscv32
+#define helper_vfwnmacc_vf_w helper_vfwnmacc_vf_w_riscv32
+#define helper_vfwnmacc_vv_h helper_vfwnmacc_vv_h_riscv32
+#define helper_vfwnmacc_vv_w helper_vfwnmacc_vv_w_riscv32
+#define helper_vfwnmsac_vf_h helper_vfwnmsac_vf_h_riscv32
+#define helper_vfwnmsac_vf_w helper_vfwnmsac_vf_w_riscv32
+#define helper_vfwnmsac_vv_h helper_vfwnmsac_vv_h_riscv32
+#define helper_vfwnmsac_vv_w helper_vfwnmsac_vv_w_riscv32
+#define helper_vfwredsum_vs_h helper_vfwredsum_vs_h_riscv32
+#define helper_vfwredsum_vs_w helper_vfwredsum_vs_w_riscv32
+#define helper_vfwsub_vf_h helper_vfwsub_vf_h_riscv32
+#define helper_vfwsub_vf_w helper_vfwsub_vf_w_riscv32
+#define helper_vfwsub_vv_h helper_vfwsub_vv_h_riscv32
+#define helper_vfwsub_vv_w helper_vfwsub_vv_w_riscv32
+#define helper_vfwsub_wf_h helper_vfwsub_wf_h_riscv32
+#define helper_vfwsub_wf_w helper_vfwsub_wf_w_riscv32
+#define helper_vfwsub_wv_h helper_vfwsub_wv_h_riscv32
+#define helper_vfwsub_wv_w helper_vfwsub_wv_w_riscv32
+#define helper_vid_v_b helper_vid_v_b_riscv32
+#define helper_vid_v_d helper_vid_v_d_riscv32
+#define helper_vid_v_h helper_vid_v_h_riscv32
+#define helper_vid_v_w helper_vid_v_w_riscv32
+#define helper_viota_m_b helper_viota_m_b_riscv32
+#define helper_viota_m_d helper_viota_m_d_riscv32
+#define helper_viota_m_h helper_viota_m_h_riscv32
+#define helper_viota_m_w helper_viota_m_w_riscv32
+#define helper_vlb_v_b helper_vlb_v_b_riscv32
+#define helper_vlb_v_b_mask helper_vlb_v_b_mask_riscv32
+#define helper_vlb_v_d helper_vlb_v_d_riscv32
+#define helper_vlb_v_d_mask helper_vlb_v_d_mask_riscv32
+#define helper_vlb_v_h helper_vlb_v_h_riscv32
+#define helper_vlb_v_h_mask helper_vlb_v_h_mask_riscv32
+#define helper_vlb_v_w helper_vlb_v_w_riscv32
+#define helper_vlb_v_w_mask helper_vlb_v_w_mask_riscv32
+#define helper_vlbff_v_b helper_vlbff_v_b_riscv32
+#define helper_vlbff_v_d helper_vlbff_v_d_riscv32
+#define helper_vlbff_v_h helper_vlbff_v_h_riscv32
+#define helper_vlbff_v_w helper_vlbff_v_w_riscv32
+#define helper_vlbu_v_b helper_vlbu_v_b_riscv32
+#define helper_vlbu_v_b_mask helper_vlbu_v_b_mask_riscv32
+#define helper_vlbu_v_d helper_vlbu_v_d_riscv32
+#define helper_vlbu_v_d_mask helper_vlbu_v_d_mask_riscv32
+#define helper_vlbu_v_h helper_vlbu_v_h_riscv32
+#define helper_vlbu_v_h_mask helper_vlbu_v_h_mask_riscv32
+#define helper_vlbu_v_w helper_vlbu_v_w_riscv32
+#define helper_vlbu_v_w_mask helper_vlbu_v_w_mask_riscv32
+#define helper_vlbuff_v_b helper_vlbuff_v_b_riscv32
+#define helper_vlbuff_v_d helper_vlbuff_v_d_riscv32
+#define helper_vlbuff_v_h helper_vlbuff_v_h_riscv32
+#define helper_vlbuff_v_w helper_vlbuff_v_w_riscv32
+#define helper_vle_v_b helper_vle_v_b_riscv32
+#define helper_vle_v_b_mask helper_vle_v_b_mask_riscv32
+#define helper_vle_v_d helper_vle_v_d_riscv32
+#define helper_vle_v_d_mask helper_vle_v_d_mask_riscv32
+#define helper_vle_v_h helper_vle_v_h_riscv32
+#define helper_vle_v_h_mask helper_vle_v_h_mask_riscv32
+#define helper_vle_v_w helper_vle_v_w_riscv32
+#define helper_vle_v_w_mask helper_vle_v_w_mask_riscv32
+#define helper_vleff_v_b helper_vleff_v_b_riscv32
+#define helper_vleff_v_d helper_vleff_v_d_riscv32
+#define helper_vleff_v_h helper_vleff_v_h_riscv32
+#define helper_vleff_v_w helper_vleff_v_w_riscv32
+#define helper_vlh_v_d helper_vlh_v_d_riscv32
+#define helper_vlh_v_d_mask helper_vlh_v_d_mask_riscv32
+#define helper_vlh_v_h helper_vlh_v_h_riscv32
+#define helper_vlh_v_h_mask helper_vlh_v_h_mask_riscv32
+#define helper_vlh_v_w helper_vlh_v_w_riscv32
+#define helper_vlh_v_w_mask helper_vlh_v_w_mask_riscv32
+#define helper_vlhff_v_d helper_vlhff_v_d_riscv32
+#define helper_vlhff_v_h helper_vlhff_v_h_riscv32
+#define helper_vlhff_v_w helper_vlhff_v_w_riscv32
+#define helper_vlhu_v_d helper_vlhu_v_d_riscv32
+#define helper_vlhu_v_d_mask helper_vlhu_v_d_mask_riscv32
+#define helper_vlhu_v_h helper_vlhu_v_h_riscv32
+#define helper_vlhu_v_h_mask helper_vlhu_v_h_mask_riscv32
+#define helper_vlhu_v_w helper_vlhu_v_w_riscv32
+#define helper_vlhu_v_w_mask helper_vlhu_v_w_mask_riscv32
+#define helper_vlhuff_v_d helper_vlhuff_v_d_riscv32
+#define helper_vlhuff_v_h helper_vlhuff_v_h_riscv32
+#define helper_vlhuff_v_w helper_vlhuff_v_w_riscv32
+#define helper_vlsb_v_b helper_vlsb_v_b_riscv32
+#define helper_vlsb_v_d helper_vlsb_v_d_riscv32
+#define helper_vlsb_v_h helper_vlsb_v_h_riscv32
+#define helper_vlsb_v_w helper_vlsb_v_w_riscv32
+#define helper_vlsbu_v_b helper_vlsbu_v_b_riscv32
+#define helper_vlsbu_v_d helper_vlsbu_v_d_riscv32
+#define helper_vlsbu_v_h helper_vlsbu_v_h_riscv32
+#define helper_vlsbu_v_w helper_vlsbu_v_w_riscv32
+#define helper_vlse_v_b helper_vlse_v_b_riscv32
+#define helper_vlse_v_d helper_vlse_v_d_riscv32
+#define helper_vlse_v_h helper_vlse_v_h_riscv32
+#define helper_vlse_v_w helper_vlse_v_w_riscv32
+#define helper_vlsh_v_d helper_vlsh_v_d_riscv32
+#define helper_vlsh_v_h helper_vlsh_v_h_riscv32
+#define helper_vlsh_v_w helper_vlsh_v_w_riscv32
+#define helper_vlshu_v_d helper_vlshu_v_d_riscv32
+#define helper_vlshu_v_h helper_vlshu_v_h_riscv32
+#define helper_vlshu_v_w helper_vlshu_v_w_riscv32
+#define helper_vlsw_v_d helper_vlsw_v_d_riscv32
+#define helper_vlsw_v_w helper_vlsw_v_w_riscv32
+#define helper_vlswu_v_d helper_vlswu_v_d_riscv32
+#define helper_vlswu_v_w helper_vlswu_v_w_riscv32
+#define helper_vlw_v_d helper_vlw_v_d_riscv32
+#define helper_vlw_v_d_mask helper_vlw_v_d_mask_riscv32
+#define helper_vlw_v_w helper_vlw_v_w_riscv32
+#define helper_vlw_v_w_mask helper_vlw_v_w_mask_riscv32
+#define helper_vlwff_v_d helper_vlwff_v_d_riscv32
+#define helper_vlwff_v_w helper_vlwff_v_w_riscv32
+#define helper_vlwu_v_d helper_vlwu_v_d_riscv32
+#define helper_vlwu_v_d_mask helper_vlwu_v_d_mask_riscv32
+#define helper_vlwu_v_w helper_vlwu_v_w_riscv32
+#define helper_vlwu_v_w_mask helper_vlwu_v_w_mask_riscv32
+#define helper_vlwuff_v_d helper_vlwuff_v_d_riscv32
+#define helper_vlwuff_v_w helper_vlwuff_v_w_riscv32
+#define helper_vlxb_v_b helper_vlxb_v_b_riscv32
+#define helper_vlxb_v_d helper_vlxb_v_d_riscv32
+#define helper_vlxb_v_h helper_vlxb_v_h_riscv32
+#define helper_vlxb_v_w helper_vlxb_v_w_riscv32
+#define helper_vlxbu_v_b helper_vlxbu_v_b_riscv32
+#define helper_vlxbu_v_d helper_vlxbu_v_d_riscv32
+#define helper_vlxbu_v_h helper_vlxbu_v_h_riscv32
+#define helper_vlxbu_v_w helper_vlxbu_v_w_riscv32
+#define helper_vlxe_v_b helper_vlxe_v_b_riscv32
+#define helper_vlxe_v_d helper_vlxe_v_d_riscv32
+#define helper_vlxe_v_h helper_vlxe_v_h_riscv32
+#define helper_vlxe_v_w helper_vlxe_v_w_riscv32
+#define helper_vlxh_v_d helper_vlxh_v_d_riscv32
+#define helper_vlxh_v_h helper_vlxh_v_h_riscv32
+#define helper_vlxh_v_w helper_vlxh_v_w_riscv32
+#define helper_vlxhu_v_d helper_vlxhu_v_d_riscv32
+#define helper_vlxhu_v_h helper_vlxhu_v_h_riscv32
+#define helper_vlxhu_v_w helper_vlxhu_v_w_riscv32
+#define helper_vlxw_v_d helper_vlxw_v_d_riscv32
+#define helper_vlxw_v_w helper_vlxw_v_w_riscv32
+#define helper_vlxwu_v_d helper_vlxwu_v_d_riscv32
+#define helper_vlxwu_v_w helper_vlxwu_v_w_riscv32
+#define helper_vmacc_vv_b helper_vmacc_vv_b_riscv32
+#define helper_vmacc_vv_d helper_vmacc_vv_d_riscv32
+#define helper_vmacc_vv_h helper_vmacc_vv_h_riscv32
+#define helper_vmacc_vv_w helper_vmacc_vv_w_riscv32
+#define helper_vmacc_vx_b helper_vmacc_vx_b_riscv32
+#define helper_vmacc_vx_d helper_vmacc_vx_d_riscv32
+#define helper_vmacc_vx_h helper_vmacc_vx_h_riscv32
+#define helper_vmacc_vx_w helper_vmacc_vx_w_riscv32
+#define helper_vmadc_vvm_b helper_vmadc_vvm_b_riscv32
+#define helper_vmadc_vvm_d helper_vmadc_vvm_d_riscv32
+#define helper_vmadc_vvm_h helper_vmadc_vvm_h_riscv32
+#define helper_vmadc_vvm_w helper_vmadc_vvm_w_riscv32
+#define helper_vmadc_vxm_b helper_vmadc_vxm_b_riscv32
+#define helper_vmadc_vxm_d helper_vmadc_vxm_d_riscv32
+#define helper_vmadc_vxm_h helper_vmadc_vxm_h_riscv32
+#define helper_vmadc_vxm_w helper_vmadc_vxm_w_riscv32
+#define helper_vmadd_vv_b helper_vmadd_vv_b_riscv32
+#define helper_vmadd_vv_d helper_vmadd_vv_d_riscv32
+#define helper_vmadd_vv_h helper_vmadd_vv_h_riscv32
+#define helper_vmadd_vv_w helper_vmadd_vv_w_riscv32
+#define helper_vmadd_vx_b helper_vmadd_vx_b_riscv32
+#define helper_vmadd_vx_d helper_vmadd_vx_d_riscv32
+#define helper_vmadd_vx_h helper_vmadd_vx_h_riscv32
+#define helper_vmadd_vx_w helper_vmadd_vx_w_riscv32
+#define helper_vmand_mm helper_vmand_mm_riscv32
+#define helper_vmandnot_mm helper_vmandnot_mm_riscv32
+#define helper_vmax_vv_b helper_vmax_vv_b_riscv32
+#define helper_vmax_vv_d helper_vmax_vv_d_riscv32
+#define helper_vmax_vv_h helper_vmax_vv_h_riscv32
+#define helper_vmax_vv_w helper_vmax_vv_w_riscv32
+#define helper_vmax_vx_b helper_vmax_vx_b_riscv32
+#define helper_vmax_vx_d helper_vmax_vx_d_riscv32
+#define helper_vmax_vx_h helper_vmax_vx_h_riscv32
+#define helper_vmax_vx_w helper_vmax_vx_w_riscv32
+#define helper_vmaxu_vv_b helper_vmaxu_vv_b_riscv32
+#define helper_vmaxu_vv_d helper_vmaxu_vv_d_riscv32
+#define helper_vmaxu_vv_h helper_vmaxu_vv_h_riscv32
+#define helper_vmaxu_vv_w helper_vmaxu_vv_w_riscv32
+#define helper_vmaxu_vx_b helper_vmaxu_vx_b_riscv32
+#define helper_vmaxu_vx_d helper_vmaxu_vx_d_riscv32
+#define helper_vmaxu_vx_h helper_vmaxu_vx_h_riscv32
+#define helper_vmaxu_vx_w helper_vmaxu_vx_w_riscv32
+#define helper_vmerge_vvm_b helper_vmerge_vvm_b_riscv32
+#define helper_vmerge_vvm_d helper_vmerge_vvm_d_riscv32
+#define helper_vmerge_vvm_h helper_vmerge_vvm_h_riscv32
+#define helper_vmerge_vvm_w helper_vmerge_vvm_w_riscv32
+#define helper_vmerge_vxm_b helper_vmerge_vxm_b_riscv32
+#define helper_vmerge_vxm_d helper_vmerge_vxm_d_riscv32
+#define helper_vmerge_vxm_h helper_vmerge_vxm_h_riscv32
+#define helper_vmerge_vxm_w helper_vmerge_vxm_w_riscv32
+#define helper_vmfeq_vf_d helper_vmfeq_vf_d_riscv32
+#define helper_vmfeq_vf_h helper_vmfeq_vf_h_riscv32
+#define helper_vmfeq_vf_w helper_vmfeq_vf_w_riscv32
+#define helper_vmfeq_vv_d helper_vmfeq_vv_d_riscv32
+#define helper_vmfeq_vv_h helper_vmfeq_vv_h_riscv32
+#define helper_vmfeq_vv_w helper_vmfeq_vv_w_riscv32
+#define helper_vmfge_vf_d helper_vmfge_vf_d_riscv32
+#define helper_vmfge_vf_h helper_vmfge_vf_h_riscv32
+#define helper_vmfge_vf_w helper_vmfge_vf_w_riscv32
+#define helper_vmfgt_vf_d helper_vmfgt_vf_d_riscv32
+#define helper_vmfgt_vf_h helper_vmfgt_vf_h_riscv32
+#define helper_vmfgt_vf_w helper_vmfgt_vf_w_riscv32
+#define helper_vmfirst_m helper_vmfirst_m_riscv32
+#define helper_vmfle_vf_d helper_vmfle_vf_d_riscv32
+#define helper_vmfle_vf_h helper_vmfle_vf_h_riscv32
+#define helper_vmfle_vf_w helper_vmfle_vf_w_riscv32
+#define helper_vmfle_vv_d helper_vmfle_vv_d_riscv32
+#define helper_vmfle_vv_h helper_vmfle_vv_h_riscv32
+#define helper_vmfle_vv_w helper_vmfle_vv_w_riscv32
+#define helper_vmflt_vf_d helper_vmflt_vf_d_riscv32
+#define helper_vmflt_vf_h helper_vmflt_vf_h_riscv32
+#define helper_vmflt_vf_w helper_vmflt_vf_w_riscv32
+#define helper_vmflt_vv_d helper_vmflt_vv_d_riscv32
+#define helper_vmflt_vv_h helper_vmflt_vv_h_riscv32
+#define helper_vmflt_vv_w helper_vmflt_vv_w_riscv32
+#define helper_vmfne_vf_d helper_vmfne_vf_d_riscv32
+#define helper_vmfne_vf_h helper_vmfne_vf_h_riscv32
+#define helper_vmfne_vf_w helper_vmfne_vf_w_riscv32
+#define helper_vmfne_vv_d helper_vmfne_vv_d_riscv32
+#define helper_vmfne_vv_h helper_vmfne_vv_h_riscv32
+#define helper_vmfne_vv_w helper_vmfne_vv_w_riscv32
+#define helper_vmford_vf_d helper_vmford_vf_d_riscv32
+#define helper_vmford_vf_h helper_vmford_vf_h_riscv32
+#define helper_vmford_vf_w helper_vmford_vf_w_riscv32
+#define helper_vmford_vv_d helper_vmford_vv_d_riscv32
+#define helper_vmford_vv_h helper_vmford_vv_h_riscv32
+#define helper_vmford_vv_w helper_vmford_vv_w_riscv32
+#define helper_vmin_vv_b helper_vmin_vv_b_riscv32
+#define helper_vmin_vv_d helper_vmin_vv_d_riscv32
+#define helper_vmin_vv_h helper_vmin_vv_h_riscv32
+#define helper_vmin_vv_w helper_vmin_vv_w_riscv32
+#define helper_vmin_vx_b helper_vmin_vx_b_riscv32
+#define helper_vmin_vx_d helper_vmin_vx_d_riscv32
+#define helper_vmin_vx_h helper_vmin_vx_h_riscv32
+#define helper_vmin_vx_w helper_vmin_vx_w_riscv32
+#define helper_vminu_vv_b helper_vminu_vv_b_riscv32
+#define helper_vminu_vv_d helper_vminu_vv_d_riscv32
+#define helper_vminu_vv_h helper_vminu_vv_h_riscv32
+#define helper_vminu_vv_w helper_vminu_vv_w_riscv32
+#define helper_vminu_vx_b helper_vminu_vx_b_riscv32
+#define helper_vminu_vx_d helper_vminu_vx_d_riscv32
+#define helper_vminu_vx_h helper_vminu_vx_h_riscv32
+#define helper_vminu_vx_w helper_vminu_vx_w_riscv32
+#define helper_vmnand_mm helper_vmnand_mm_riscv32
+#define helper_vmnor_mm helper_vmnor_mm_riscv32
+#define helper_vmor_mm helper_vmor_mm_riscv32
+#define helper_vmornot_mm helper_vmornot_mm_riscv32
+#define helper_vmpopc_m helper_vmpopc_m_riscv32
+#define helper_vmsbc_vvm_b helper_vmsbc_vvm_b_riscv32
+#define helper_vmsbc_vvm_d helper_vmsbc_vvm_d_riscv32
+#define helper_vmsbc_vvm_h helper_vmsbc_vvm_h_riscv32
+#define helper_vmsbc_vvm_w helper_vmsbc_vvm_w_riscv32
+#define helper_vmsbc_vxm_b helper_vmsbc_vxm_b_riscv32
+#define helper_vmsbc_vxm_d helper_vmsbc_vxm_d_riscv32
+#define helper_vmsbc_vxm_h helper_vmsbc_vxm_h_riscv32
+#define helper_vmsbc_vxm_w helper_vmsbc_vxm_w_riscv32
+#define helper_vmsbf_m helper_vmsbf_m_riscv32
+#define helper_vmseq_vv_b helper_vmseq_vv_b_riscv32
+#define helper_vmseq_vv_d helper_vmseq_vv_d_riscv32
+#define helper_vmseq_vv_h helper_vmseq_vv_h_riscv32
+#define helper_vmseq_vv_w helper_vmseq_vv_w_riscv32
+#define helper_vmseq_vx_b helper_vmseq_vx_b_riscv32
+#define helper_vmseq_vx_d helper_vmseq_vx_d_riscv32
+#define helper_vmseq_vx_h helper_vmseq_vx_h_riscv32
+#define helper_vmseq_vx_w helper_vmseq_vx_w_riscv32
+#define helper_vmsgt_vx_b helper_vmsgt_vx_b_riscv32
+#define helper_vmsgt_vx_d helper_vmsgt_vx_d_riscv32
+#define helper_vmsgt_vx_h helper_vmsgt_vx_h_riscv32
+#define helper_vmsgt_vx_w helper_vmsgt_vx_w_riscv32
+#define helper_vmsgtu_vx_b helper_vmsgtu_vx_b_riscv32
+#define helper_vmsgtu_vx_d helper_vmsgtu_vx_d_riscv32
+#define helper_vmsgtu_vx_h helper_vmsgtu_vx_h_riscv32
+#define helper_vmsgtu_vx_w helper_vmsgtu_vx_w_riscv32
+#define helper_vmsif_m helper_vmsif_m_riscv32
+#define helper_vmsle_vv_b helper_vmsle_vv_b_riscv32
+#define helper_vmsle_vv_d helper_vmsle_vv_d_riscv32
+#define helper_vmsle_vv_h helper_vmsle_vv_h_riscv32
+#define helper_vmsle_vv_w helper_vmsle_vv_w_riscv32
+#define helper_vmsle_vx_b helper_vmsle_vx_b_riscv32
+#define helper_vmsle_vx_d helper_vmsle_vx_d_riscv32
+#define helper_vmsle_vx_h helper_vmsle_vx_h_riscv32
+#define helper_vmsle_vx_w helper_vmsle_vx_w_riscv32
+#define helper_vmsleu_vv_b helper_vmsleu_vv_b_riscv32
+#define helper_vmsleu_vv_d helper_vmsleu_vv_d_riscv32
+#define helper_vmsleu_vv_h helper_vmsleu_vv_h_riscv32
+#define helper_vmsleu_vv_w helper_vmsleu_vv_w_riscv32
+#define helper_vmsleu_vx_b helper_vmsleu_vx_b_riscv32
+#define helper_vmsleu_vx_d helper_vmsleu_vx_d_riscv32
+#define helper_vmsleu_vx_h helper_vmsleu_vx_h_riscv32
+#define helper_vmsleu_vx_w helper_vmsleu_vx_w_riscv32
+#define helper_vmslt_vv_b helper_vmslt_vv_b_riscv32
+#define helper_vmslt_vv_d helper_vmslt_vv_d_riscv32
+#define helper_vmslt_vv_h helper_vmslt_vv_h_riscv32
+#define helper_vmslt_vv_w helper_vmslt_vv_w_riscv32
+#define helper_vmslt_vx_b helper_vmslt_vx_b_riscv32
+#define helper_vmslt_vx_d helper_vmslt_vx_d_riscv32
+#define helper_vmslt_vx_h helper_vmslt_vx_h_riscv32
+#define helper_vmslt_vx_w helper_vmslt_vx_w_riscv32
+#define helper_vmsltu_vv_b helper_vmsltu_vv_b_riscv32
+#define helper_vmsltu_vv_d helper_vmsltu_vv_d_riscv32
+#define helper_vmsltu_vv_h helper_vmsltu_vv_h_riscv32
+#define helper_vmsltu_vv_w helper_vmsltu_vv_w_riscv32
+#define helper_vmsltu_vx_b helper_vmsltu_vx_b_riscv32
+#define helper_vmsltu_vx_d helper_vmsltu_vx_d_riscv32
+#define helper_vmsltu_vx_h helper_vmsltu_vx_h_riscv32
+#define helper_vmsltu_vx_w helper_vmsltu_vx_w_riscv32
+#define helper_vmsne_vv_b helper_vmsne_vv_b_riscv32
+#define helper_vmsne_vv_d helper_vmsne_vv_d_riscv32
+#define helper_vmsne_vv_h helper_vmsne_vv_h_riscv32
+#define helper_vmsne_vv_w helper_vmsne_vv_w_riscv32
+#define helper_vmsne_vx_b helper_vmsne_vx_b_riscv32
+#define helper_vmsne_vx_d helper_vmsne_vx_d_riscv32
+#define helper_vmsne_vx_h helper_vmsne_vx_h_riscv32
+#define helper_vmsne_vx_w helper_vmsne_vx_w_riscv32
+#define helper_vmsof_m helper_vmsof_m_riscv32
+#define helper_vmul_vv_b helper_vmul_vv_b_riscv32
+#define helper_vmul_vv_d helper_vmul_vv_d_riscv32
+#define helper_vmul_vv_h helper_vmul_vv_h_riscv32
+#define helper_vmul_vv_w helper_vmul_vv_w_riscv32
+#define helper_vmul_vx_b helper_vmul_vx_b_riscv32
+#define helper_vmul_vx_d helper_vmul_vx_d_riscv32
+#define helper_vmul_vx_h helper_vmul_vx_h_riscv32
+#define helper_vmul_vx_w helper_vmul_vx_w_riscv32
+#define helper_vmulh_vv_b helper_vmulh_vv_b_riscv32
+#define helper_vmulh_vv_d helper_vmulh_vv_d_riscv32
+#define helper_vmulh_vv_h helper_vmulh_vv_h_riscv32
+#define helper_vmulh_vv_w helper_vmulh_vv_w_riscv32
+#define helper_vmulh_vx_b helper_vmulh_vx_b_riscv32
+#define helper_vmulh_vx_d helper_vmulh_vx_d_riscv32
+#define helper_vmulh_vx_h helper_vmulh_vx_h_riscv32
+#define helper_vmulh_vx_w helper_vmulh_vx_w_riscv32
+#define helper_vmulhsu_vv_b helper_vmulhsu_vv_b_riscv32
+#define helper_vmulhsu_vv_d helper_vmulhsu_vv_d_riscv32
+#define helper_vmulhsu_vv_h helper_vmulhsu_vv_h_riscv32
+#define helper_vmulhsu_vv_w helper_vmulhsu_vv_w_riscv32
+#define helper_vmulhsu_vx_b helper_vmulhsu_vx_b_riscv32
+#define helper_vmulhsu_vx_d helper_vmulhsu_vx_d_riscv32
+#define helper_vmulhsu_vx_h helper_vmulhsu_vx_h_riscv32
+#define helper_vmulhsu_vx_w helper_vmulhsu_vx_w_riscv32
+#define helper_vmulhu_vv_b helper_vmulhu_vv_b_riscv32
+#define helper_vmulhu_vv_d helper_vmulhu_vv_d_riscv32
+#define helper_vmulhu_vv_h helper_vmulhu_vv_h_riscv32
+#define helper_vmulhu_vv_w helper_vmulhu_vv_w_riscv32
+#define helper_vmulhu_vx_b helper_vmulhu_vx_b_riscv32
+#define helper_vmulhu_vx_d helper_vmulhu_vx_d_riscv32
+#define helper_vmulhu_vx_h helper_vmulhu_vx_h_riscv32
+#define helper_vmulhu_vx_w helper_vmulhu_vx_w_riscv32
+#define helper_vmv_v_v_b helper_vmv_v_v_b_riscv32
+#define helper_vmv_v_v_d helper_vmv_v_v_d_riscv32
+#define helper_vmv_v_v_h helper_vmv_v_v_h_riscv32
+#define helper_vmv_v_v_w helper_vmv_v_v_w_riscv32
+#define helper_vmv_v_x_b helper_vmv_v_x_b_riscv32
+#define helper_vmv_v_x_d helper_vmv_v_x_d_riscv32
+#define helper_vmv_v_x_h helper_vmv_v_x_h_riscv32
+#define helper_vmv_v_x_w helper_vmv_v_x_w_riscv32
+#define helper_vmxnor_mm helper_vmxnor_mm_riscv32
+#define helper_vmxor_mm helper_vmxor_mm_riscv32
+#define helper_vnclip_vv_b helper_vnclip_vv_b_riscv32
+#define helper_vnclip_vv_h helper_vnclip_vv_h_riscv32
+#define helper_vnclip_vv_w helper_vnclip_vv_w_riscv32
+#define helper_vnclip_vx_b helper_vnclip_vx_b_riscv32
+#define helper_vnclip_vx_h helper_vnclip_vx_h_riscv32
+#define helper_vnclip_vx_w helper_vnclip_vx_w_riscv32
+#define helper_vnclipu_vv_b helper_vnclipu_vv_b_riscv32
+#define helper_vnclipu_vv_h helper_vnclipu_vv_h_riscv32
+#define helper_vnclipu_vv_w helper_vnclipu_vv_w_riscv32
+#define helper_vnclipu_vx_b helper_vnclipu_vx_b_riscv32
+#define helper_vnclipu_vx_h helper_vnclipu_vx_h_riscv32
+#define helper_vnclipu_vx_w helper_vnclipu_vx_w_riscv32
+#define helper_vnmsac_vv_b helper_vnmsac_vv_b_riscv32
+#define helper_vnmsac_vv_d helper_vnmsac_vv_d_riscv32
+#define helper_vnmsac_vv_h helper_vnmsac_vv_h_riscv32
+#define helper_vnmsac_vv_w helper_vnmsac_vv_w_riscv32
+#define helper_vnmsac_vx_b helper_vnmsac_vx_b_riscv32
+#define helper_vnmsac_vx_d helper_vnmsac_vx_d_riscv32
+#define helper_vnmsac_vx_h helper_vnmsac_vx_h_riscv32
+#define helper_vnmsac_vx_w helper_vnmsac_vx_w_riscv32
+#define helper_vnmsub_vv_b helper_vnmsub_vv_b_riscv32
+#define helper_vnmsub_vv_d helper_vnmsub_vv_d_riscv32
+#define helper_vnmsub_vv_h helper_vnmsub_vv_h_riscv32
+#define helper_vnmsub_vv_w helper_vnmsub_vv_w_riscv32
+#define helper_vnmsub_vx_b helper_vnmsub_vx_b_riscv32
+#define helper_vnmsub_vx_d helper_vnmsub_vx_d_riscv32
+#define helper_vnmsub_vx_h helper_vnmsub_vx_h_riscv32
+#define helper_vnmsub_vx_w helper_vnmsub_vx_w_riscv32
+#define helper_vnsra_vv_b helper_vnsra_vv_b_riscv32
+#define helper_vnsra_vv_h helper_vnsra_vv_h_riscv32
+#define helper_vnsra_vv_w helper_vnsra_vv_w_riscv32
+#define helper_vnsra_vx_b helper_vnsra_vx_b_riscv32
+#define helper_vnsra_vx_h helper_vnsra_vx_h_riscv32
+#define helper_vnsra_vx_w helper_vnsra_vx_w_riscv32
+#define helper_vnsrl_vv_b helper_vnsrl_vv_b_riscv32
+#define helper_vnsrl_vv_h helper_vnsrl_vv_h_riscv32
+#define helper_vnsrl_vv_w helper_vnsrl_vv_w_riscv32
+#define helper_vnsrl_vx_b helper_vnsrl_vx_b_riscv32
+#define helper_vnsrl_vx_h helper_vnsrl_vx_h_riscv32
+#define helper_vnsrl_vx_w helper_vnsrl_vx_w_riscv32
+#define helper_vor_vv_b helper_vor_vv_b_riscv32
+#define helper_vor_vv_d helper_vor_vv_d_riscv32
+#define helper_vor_vv_h helper_vor_vv_h_riscv32
+#define helper_vor_vv_w helper_vor_vv_w_riscv32
+#define helper_vor_vx_b helper_vor_vx_b_riscv32
+#define helper_vor_vx_d helper_vor_vx_d_riscv32
+#define helper_vor_vx_h helper_vor_vx_h_riscv32
+#define helper_vor_vx_w helper_vor_vx_w_riscv32
+#define helper_vredand_vs_b helper_vredand_vs_b_riscv32
+#define helper_vredand_vs_d helper_vredand_vs_d_riscv32
+#define helper_vredand_vs_h helper_vredand_vs_h_riscv32
+#define helper_vredand_vs_w helper_vredand_vs_w_riscv32
+#define helper_vredmax_vs_b helper_vredmax_vs_b_riscv32
+#define helper_vredmax_vs_d helper_vredmax_vs_d_riscv32
+#define helper_vredmax_vs_h helper_vredmax_vs_h_riscv32
+#define helper_vredmax_vs_w helper_vredmax_vs_w_riscv32
+#define helper_vredmaxu_vs_b helper_vredmaxu_vs_b_riscv32
+#define helper_vredmaxu_vs_d helper_vredmaxu_vs_d_riscv32
+#define helper_vredmaxu_vs_h helper_vredmaxu_vs_h_riscv32
+#define helper_vredmaxu_vs_w helper_vredmaxu_vs_w_riscv32
+#define helper_vredmin_vs_b helper_vredmin_vs_b_riscv32
+#define helper_vredmin_vs_d helper_vredmin_vs_d_riscv32
+#define helper_vredmin_vs_h helper_vredmin_vs_h_riscv32
+#define helper_vredmin_vs_w helper_vredmin_vs_w_riscv32
+#define helper_vredminu_vs_b helper_vredminu_vs_b_riscv32
+#define helper_vredminu_vs_d helper_vredminu_vs_d_riscv32
+#define helper_vredminu_vs_h helper_vredminu_vs_h_riscv32
+#define helper_vredminu_vs_w helper_vredminu_vs_w_riscv32
+#define helper_vredor_vs_b helper_vredor_vs_b_riscv32
+#define helper_vredor_vs_d helper_vredor_vs_d_riscv32
+#define helper_vredor_vs_h helper_vredor_vs_h_riscv32
+#define helper_vredor_vs_w helper_vredor_vs_w_riscv32
+#define helper_vredsum_vs_b helper_vredsum_vs_b_riscv32
+#define helper_vredsum_vs_d helper_vredsum_vs_d_riscv32
+#define helper_vredsum_vs_h helper_vredsum_vs_h_riscv32
+#define helper_vredsum_vs_w helper_vredsum_vs_w_riscv32
+#define helper_vredxor_vs_b helper_vredxor_vs_b_riscv32
+#define helper_vredxor_vs_d helper_vredxor_vs_d_riscv32
+#define helper_vredxor_vs_h helper_vredxor_vs_h_riscv32
+#define helper_vredxor_vs_w helper_vredxor_vs_w_riscv32
+#define helper_vrem_vv_b helper_vrem_vv_b_riscv32
+#define helper_vrem_vv_d helper_vrem_vv_d_riscv32
+#define helper_vrem_vv_h helper_vrem_vv_h_riscv32
+#define helper_vrem_vv_w helper_vrem_vv_w_riscv32
+#define helper_vrem_vx_b helper_vrem_vx_b_riscv32
+#define helper_vrem_vx_d helper_vrem_vx_d_riscv32
+#define helper_vrem_vx_h helper_vrem_vx_h_riscv32
+#define helper_vrem_vx_w helper_vrem_vx_w_riscv32
+#define helper_vremu_vv_b helper_vremu_vv_b_riscv32
+#define helper_vremu_vv_d helper_vremu_vv_d_riscv32
+#define helper_vremu_vv_h helper_vremu_vv_h_riscv32
+#define helper_vremu_vv_w helper_vremu_vv_w_riscv32
+#define helper_vremu_vx_b helper_vremu_vx_b_riscv32
+#define helper_vremu_vx_d helper_vremu_vx_d_riscv32
+#define helper_vremu_vx_h helper_vremu_vx_h_riscv32
+#define helper_vremu_vx_w helper_vremu_vx_w_riscv32
+#define helper_vrgather_vv_b helper_vrgather_vv_b_riscv32
+#define helper_vrgather_vv_d helper_vrgather_vv_d_riscv32
+#define helper_vrgather_vv_h helper_vrgather_vv_h_riscv32
+#define helper_vrgather_vv_w helper_vrgather_vv_w_riscv32
+#define helper_vrgather_vx_b helper_vrgather_vx_b_riscv32
+#define helper_vrgather_vx_d helper_vrgather_vx_d_riscv32
+#define helper_vrgather_vx_h helper_vrgather_vx_h_riscv32
+#define helper_vrgather_vx_w helper_vrgather_vx_w_riscv32
+#define helper_vrsub_vx_b helper_vrsub_vx_b_riscv32
+#define helper_vrsub_vx_d helper_vrsub_vx_d_riscv32
+#define helper_vrsub_vx_h helper_vrsub_vx_h_riscv32
+#define helper_vrsub_vx_w helper_vrsub_vx_w_riscv32
+#define helper_vsadd_vv_b helper_vsadd_vv_b_riscv32
+#define helper_vsadd_vv_d helper_vsadd_vv_d_riscv32
+#define helper_vsadd_vv_h helper_vsadd_vv_h_riscv32
+#define helper_vsadd_vv_w helper_vsadd_vv_w_riscv32
+#define helper_vsadd_vx_b helper_vsadd_vx_b_riscv32
+#define helper_vsadd_vx_d helper_vsadd_vx_d_riscv32
+#define helper_vsadd_vx_h helper_vsadd_vx_h_riscv32
+#define helper_vsadd_vx_w helper_vsadd_vx_w_riscv32
+#define helper_vsaddu_vv_b helper_vsaddu_vv_b_riscv32
+#define helper_vsaddu_vv_d helper_vsaddu_vv_d_riscv32
+#define helper_vsaddu_vv_h helper_vsaddu_vv_h_riscv32
+#define helper_vsaddu_vv_w helper_vsaddu_vv_w_riscv32
+#define helper_vsaddu_vx_b helper_vsaddu_vx_b_riscv32
+#define helper_vsaddu_vx_d helper_vsaddu_vx_d_riscv32
+#define helper_vsaddu_vx_h helper_vsaddu_vx_h_riscv32
+#define helper_vsaddu_vx_w helper_vsaddu_vx_w_riscv32
+#define helper_vsb_v_b helper_vsb_v_b_riscv32
+#define helper_vsb_v_b_mask helper_vsb_v_b_mask_riscv32
+#define helper_vsb_v_d helper_vsb_v_d_riscv32
+#define helper_vsb_v_d_mask helper_vsb_v_d_mask_riscv32
+#define helper_vsb_v_h helper_vsb_v_h_riscv32
+#define helper_vsb_v_h_mask helper_vsb_v_h_mask_riscv32
+#define helper_vsb_v_w helper_vsb_v_w_riscv32
+#define helper_vsb_v_w_mask helper_vsb_v_w_mask_riscv32
+#define helper_vsbc_vvm_b helper_vsbc_vvm_b_riscv32
+#define helper_vsbc_vvm_d helper_vsbc_vvm_d_riscv32
+#define helper_vsbc_vvm_h helper_vsbc_vvm_h_riscv32
+#define helper_vsbc_vvm_w helper_vsbc_vvm_w_riscv32
+#define helper_vsbc_vxm_b helper_vsbc_vxm_b_riscv32
+#define helper_vsbc_vxm_d helper_vsbc_vxm_d_riscv32
+#define helper_vsbc_vxm_h helper_vsbc_vxm_h_riscv32
+#define helper_vsbc_vxm_w helper_vsbc_vxm_w_riscv32
+#define helper_vse_v_b helper_vse_v_b_riscv32
+#define helper_vse_v_b_mask helper_vse_v_b_mask_riscv32
+#define helper_vse_v_d helper_vse_v_d_riscv32
+#define helper_vse_v_d_mask helper_vse_v_d_mask_riscv32
+#define helper_vse_v_h helper_vse_v_h_riscv32
+#define helper_vse_v_h_mask helper_vse_v_h_mask_riscv32
+#define helper_vse_v_w helper_vse_v_w_riscv32
+#define helper_vse_v_w_mask helper_vse_v_w_mask_riscv32
+#define helper_vsetvl helper_vsetvl_riscv32
+#define helper_vsh_v_d helper_vsh_v_d_riscv32
+#define helper_vsh_v_d_mask helper_vsh_v_d_mask_riscv32
+#define helper_vsh_v_h helper_vsh_v_h_riscv32
+#define helper_vsh_v_h_mask helper_vsh_v_h_mask_riscv32
+#define helper_vsh_v_w helper_vsh_v_w_riscv32
+#define helper_vsh_v_w_mask helper_vsh_v_w_mask_riscv32
+#define helper_vslide1down_vx_b helper_vslide1down_vx_b_riscv32
+#define helper_vslide1down_vx_d helper_vslide1down_vx_d_riscv32
+#define helper_vslide1down_vx_h helper_vslide1down_vx_h_riscv32
+#define helper_vslide1down_vx_w helper_vslide1down_vx_w_riscv32
+#define helper_vslide1up_vx_b helper_vslide1up_vx_b_riscv32
+#define helper_vslide1up_vx_d helper_vslide1up_vx_d_riscv32
+#define helper_vslide1up_vx_h helper_vslide1up_vx_h_riscv32
+#define helper_vslide1up_vx_w helper_vslide1up_vx_w_riscv32
+#define helper_vslidedown_vx_b helper_vslidedown_vx_b_riscv32
+#define helper_vslidedown_vx_d helper_vslidedown_vx_d_riscv32
+#define helper_vslidedown_vx_h helper_vslidedown_vx_h_riscv32
+#define helper_vslidedown_vx_w helper_vslidedown_vx_w_riscv32
+#define helper_vslideup_vx_b helper_vslideup_vx_b_riscv32
+#define helper_vslideup_vx_d helper_vslideup_vx_d_riscv32
+#define helper_vslideup_vx_h helper_vslideup_vx_h_riscv32
+#define helper_vslideup_vx_w helper_vslideup_vx_w_riscv32
+#define helper_vsll_vv_b helper_vsll_vv_b_riscv32
+#define helper_vsll_vv_d helper_vsll_vv_d_riscv32
+#define helper_vsll_vv_h helper_vsll_vv_h_riscv32
+#define helper_vsll_vv_w helper_vsll_vv_w_riscv32
+#define helper_vsll_vx_b helper_vsll_vx_b_riscv32
+#define helper_vsll_vx_d helper_vsll_vx_d_riscv32
+#define helper_vsll_vx_h helper_vsll_vx_h_riscv32
+#define helper_vsll_vx_w helper_vsll_vx_w_riscv32
+#define helper_vsmul_vv_b helper_vsmul_vv_b_riscv32
+#define helper_vsmul_vv_d helper_vsmul_vv_d_riscv32
+#define helper_vsmul_vv_h helper_vsmul_vv_h_riscv32
+#define helper_vsmul_vv_w helper_vsmul_vv_w_riscv32
+#define helper_vsmul_vx_b helper_vsmul_vx_b_riscv32
+#define helper_vsmul_vx_d helper_vsmul_vx_d_riscv32
+#define helper_vsmul_vx_h helper_vsmul_vx_h_riscv32
+#define helper_vsmul_vx_w helper_vsmul_vx_w_riscv32
+#define helper_vsra_vv_b helper_vsra_vv_b_riscv32
+#define helper_vsra_vv_d helper_vsra_vv_d_riscv32
+#define helper_vsra_vv_h helper_vsra_vv_h_riscv32
+#define helper_vsra_vv_w helper_vsra_vv_w_riscv32
+#define helper_vsra_vx_b helper_vsra_vx_b_riscv32
+#define helper_vsra_vx_d helper_vsra_vx_d_riscv32
+#define helper_vsra_vx_h helper_vsra_vx_h_riscv32
+#define helper_vsra_vx_w helper_vsra_vx_w_riscv32
+#define helper_vsrl_vv_b helper_vsrl_vv_b_riscv32
+#define helper_vsrl_vv_d helper_vsrl_vv_d_riscv32
+#define helper_vsrl_vv_h helper_vsrl_vv_h_riscv32
+#define helper_vsrl_vv_w helper_vsrl_vv_w_riscv32
+#define helper_vsrl_vx_b helper_vsrl_vx_b_riscv32
+#define helper_vsrl_vx_d helper_vsrl_vx_d_riscv32
+#define helper_vsrl_vx_h helper_vsrl_vx_h_riscv32
+#define helper_vsrl_vx_w helper_vsrl_vx_w_riscv32
+#define helper_vssb_v_b helper_vssb_v_b_riscv32
+#define helper_vssb_v_d helper_vssb_v_d_riscv32
+#define helper_vssb_v_h helper_vssb_v_h_riscv32
+#define helper_vssb_v_w helper_vssb_v_w_riscv32
+#define helper_vsse_v_b helper_vsse_v_b_riscv32
+#define helper_vsse_v_d helper_vsse_v_d_riscv32
+#define helper_vsse_v_h helper_vsse_v_h_riscv32
+#define helper_vsse_v_w helper_vsse_v_w_riscv32
+#define helper_vssh_v_d helper_vssh_v_d_riscv32
+#define helper_vssh_v_h helper_vssh_v_h_riscv32
+#define helper_vssh_v_w helper_vssh_v_w_riscv32
+#define helper_vssra_vv_b helper_vssra_vv_b_riscv32
+#define helper_vssra_vv_d helper_vssra_vv_d_riscv32
+#define helper_vssra_vv_h helper_vssra_vv_h_riscv32
+#define helper_vssra_vv_w helper_vssra_vv_w_riscv32
+#define helper_vssra_vx_b helper_vssra_vx_b_riscv32
+#define helper_vssra_vx_d helper_vssra_vx_d_riscv32
+#define helper_vssra_vx_h helper_vssra_vx_h_riscv32
+#define helper_vssra_vx_w helper_vssra_vx_w_riscv32
+#define helper_vssrl_vv_b helper_vssrl_vv_b_riscv32
+#define helper_vssrl_vv_d helper_vssrl_vv_d_riscv32
+#define helper_vssrl_vv_h helper_vssrl_vv_h_riscv32
+#define helper_vssrl_vv_w helper_vssrl_vv_w_riscv32
+#define helper_vssrl_vx_b helper_vssrl_vx_b_riscv32
+#define helper_vssrl_vx_d helper_vssrl_vx_d_riscv32
+#define helper_vssrl_vx_h helper_vssrl_vx_h_riscv32
+#define helper_vssrl_vx_w helper_vssrl_vx_w_riscv32
+#define helper_vssub_vv_b helper_vssub_vv_b_riscv32
+#define helper_vssub_vv_d helper_vssub_vv_d_riscv32
+#define helper_vssub_vv_h helper_vssub_vv_h_riscv32
+#define helper_vssub_vv_w helper_vssub_vv_w_riscv32
+#define helper_vssub_vx_b helper_vssub_vx_b_riscv32
+#define helper_vssub_vx_d helper_vssub_vx_d_riscv32
+#define helper_vssub_vx_h helper_vssub_vx_h_riscv32
+#define helper_vssub_vx_w helper_vssub_vx_w_riscv32
+#define helper_vssubu_vv_b helper_vssubu_vv_b_riscv32
+#define helper_vssubu_vv_d helper_vssubu_vv_d_riscv32
+#define helper_vssubu_vv_h helper_vssubu_vv_h_riscv32
+#define helper_vssubu_vv_w helper_vssubu_vv_w_riscv32
+#define helper_vssubu_vx_b helper_vssubu_vx_b_riscv32
+#define helper_vssubu_vx_d helper_vssubu_vx_d_riscv32
+#define helper_vssubu_vx_h helper_vssubu_vx_h_riscv32
+#define helper_vssubu_vx_w helper_vssubu_vx_w_riscv32
+#define helper_vssw_v_d helper_vssw_v_d_riscv32
+#define helper_vssw_v_w helper_vssw_v_w_riscv32
+#define helper_vsub_vv_b helper_vsub_vv_b_riscv32
+#define helper_vsub_vv_d helper_vsub_vv_d_riscv32
+#define helper_vsub_vv_h helper_vsub_vv_h_riscv32
+#define helper_vsub_vv_w helper_vsub_vv_w_riscv32
+#define helper_vsub_vx_b helper_vsub_vx_b_riscv32
+#define helper_vsub_vx_d helper_vsub_vx_d_riscv32
+#define helper_vsub_vx_h helper_vsub_vx_h_riscv32
+#define helper_vsub_vx_w helper_vsub_vx_w_riscv32
+#define helper_vsw_v_d helper_vsw_v_d_riscv32
+#define helper_vsw_v_d_mask helper_vsw_v_d_mask_riscv32
+#define helper_vsw_v_w helper_vsw_v_w_riscv32
+#define helper_vsw_v_w_mask helper_vsw_v_w_mask_riscv32
+#define helper_vsxb_v_b helper_vsxb_v_b_riscv32
+#define helper_vsxb_v_d helper_vsxb_v_d_riscv32
+#define helper_vsxb_v_h helper_vsxb_v_h_riscv32
+#define helper_vsxb_v_w helper_vsxb_v_w_riscv32
+#define helper_vsxe_v_b helper_vsxe_v_b_riscv32
+#define helper_vsxe_v_d helper_vsxe_v_d_riscv32
+#define helper_vsxe_v_h helper_vsxe_v_h_riscv32
+#define helper_vsxe_v_w helper_vsxe_v_w_riscv32
+#define helper_vsxh_v_d helper_vsxh_v_d_riscv32
+#define helper_vsxh_v_h helper_vsxh_v_h_riscv32
+#define helper_vsxh_v_w helper_vsxh_v_w_riscv32
+#define helper_vsxw_v_d helper_vsxw_v_d_riscv32
+#define helper_vsxw_v_w helper_vsxw_v_w_riscv32
+#define helper_vwadd_vv_b helper_vwadd_vv_b_riscv32
+#define helper_vwadd_vv_h helper_vwadd_vv_h_riscv32
+#define helper_vwadd_vv_w helper_vwadd_vv_w_riscv32
+#define helper_vwadd_vx_b helper_vwadd_vx_b_riscv32
+#define helper_vwadd_vx_h helper_vwadd_vx_h_riscv32
+#define helper_vwadd_vx_w helper_vwadd_vx_w_riscv32
+#define helper_vwadd_wv_b helper_vwadd_wv_b_riscv32
+#define helper_vwadd_wv_h helper_vwadd_wv_h_riscv32
+#define helper_vwadd_wv_w helper_vwadd_wv_w_riscv32
+#define helper_vwadd_wx_b helper_vwadd_wx_b_riscv32
+#define helper_vwadd_wx_h helper_vwadd_wx_h_riscv32
+#define helper_vwadd_wx_w helper_vwadd_wx_w_riscv32
+#define helper_vwaddu_vv_b helper_vwaddu_vv_b_riscv32
+#define helper_vwaddu_vv_h helper_vwaddu_vv_h_riscv32
+#define helper_vwaddu_vv_w helper_vwaddu_vv_w_riscv32
+#define helper_vwaddu_vx_b helper_vwaddu_vx_b_riscv32
+#define helper_vwaddu_vx_h helper_vwaddu_vx_h_riscv32
+#define helper_vwaddu_vx_w helper_vwaddu_vx_w_riscv32
+#define helper_vwaddu_wv_b helper_vwaddu_wv_b_riscv32
+#define helper_vwaddu_wv_h helper_vwaddu_wv_h_riscv32
+#define helper_vwaddu_wv_w helper_vwaddu_wv_w_riscv32
+#define helper_vwaddu_wx_b helper_vwaddu_wx_b_riscv32
+#define helper_vwaddu_wx_h helper_vwaddu_wx_h_riscv32
+#define helper_vwaddu_wx_w helper_vwaddu_wx_w_riscv32
+#define helper_vwmacc_vv_b helper_vwmacc_vv_b_riscv32
+#define helper_vwmacc_vv_h helper_vwmacc_vv_h_riscv32
+#define helper_vwmacc_vv_w helper_vwmacc_vv_w_riscv32
+#define helper_vwmacc_vx_b helper_vwmacc_vx_b_riscv32
+#define helper_vwmacc_vx_h helper_vwmacc_vx_h_riscv32
+#define helper_vwmacc_vx_w helper_vwmacc_vx_w_riscv32
+#define helper_vwmaccsu_vv_b helper_vwmaccsu_vv_b_riscv32
+#define helper_vwmaccsu_vv_h helper_vwmaccsu_vv_h_riscv32
+#define helper_vwmaccsu_vv_w helper_vwmaccsu_vv_w_riscv32
+#define helper_vwmaccsu_vx_b helper_vwmaccsu_vx_b_riscv32
+#define helper_vwmaccsu_vx_h helper_vwmaccsu_vx_h_riscv32
+#define helper_vwmaccsu_vx_w helper_vwmaccsu_vx_w_riscv32
+#define helper_vwmaccu_vv_b helper_vwmaccu_vv_b_riscv32
+#define helper_vwmaccu_vv_h helper_vwmaccu_vv_h_riscv32
+#define helper_vwmaccu_vv_w helper_vwmaccu_vv_w_riscv32
+#define helper_vwmaccu_vx_b helper_vwmaccu_vx_b_riscv32
+#define helper_vwmaccu_vx_h helper_vwmaccu_vx_h_riscv32
+#define helper_vwmaccu_vx_w helper_vwmaccu_vx_w_riscv32
+#define helper_vwmaccus_vx_b helper_vwmaccus_vx_b_riscv32
+#define helper_vwmaccus_vx_h helper_vwmaccus_vx_h_riscv32
+#define helper_vwmaccus_vx_w helper_vwmaccus_vx_w_riscv32
+#define helper_vwmul_vv_b helper_vwmul_vv_b_riscv32
+#define helper_vwmul_vv_h helper_vwmul_vv_h_riscv32
+#define helper_vwmul_vv_w helper_vwmul_vv_w_riscv32
+#define helper_vwmul_vx_b helper_vwmul_vx_b_riscv32
+#define helper_vwmul_vx_h helper_vwmul_vx_h_riscv32
+#define helper_vwmul_vx_w helper_vwmul_vx_w_riscv32
+#define helper_vwmulsu_vv_b helper_vwmulsu_vv_b_riscv32
+#define helper_vwmulsu_vv_h helper_vwmulsu_vv_h_riscv32
+#define helper_vwmulsu_vv_w helper_vwmulsu_vv_w_riscv32
+#define helper_vwmulsu_vx_b helper_vwmulsu_vx_b_riscv32
+#define helper_vwmulsu_vx_h helper_vwmulsu_vx_h_riscv32
+#define helper_vwmulsu_vx_w helper_vwmulsu_vx_w_riscv32
+#define helper_vwmulu_vv_b helper_vwmulu_vv_b_riscv32
+#define helper_vwmulu_vv_h helper_vwmulu_vv_h_riscv32
+#define helper_vwmulu_vv_w helper_vwmulu_vv_w_riscv32
+#define helper_vwmulu_vx_b helper_vwmulu_vx_b_riscv32
+#define helper_vwmulu_vx_h helper_vwmulu_vx_h_riscv32
+#define helper_vwmulu_vx_w helper_vwmulu_vx_w_riscv32
+#define helper_vwredsum_vs_b helper_vwredsum_vs_b_riscv32
+#define helper_vwredsum_vs_h helper_vwredsum_vs_h_riscv32
+#define helper_vwredsum_vs_w helper_vwredsum_vs_w_riscv32
+#define helper_vwredsumu_vs_b helper_vwredsumu_vs_b_riscv32
+#define helper_vwredsumu_vs_h helper_vwredsumu_vs_h_riscv32
+#define helper_vwredsumu_vs_w helper_vwredsumu_vs_w_riscv32
+#define helper_vwsmacc_vv_b helper_vwsmacc_vv_b_riscv32
+#define helper_vwsmacc_vv_h helper_vwsmacc_vv_h_riscv32
+#define helper_vwsmacc_vv_w helper_vwsmacc_vv_w_riscv32
+#define helper_vwsmacc_vx_b helper_vwsmacc_vx_b_riscv32
+#define helper_vwsmacc_vx_h helper_vwsmacc_vx_h_riscv32
+#define helper_vwsmacc_vx_w helper_vwsmacc_vx_w_riscv32
+#define helper_vwsmaccsu_vv_b helper_vwsmaccsu_vv_b_riscv32
+#define helper_vwsmaccsu_vv_h helper_vwsmaccsu_vv_h_riscv32
+#define helper_vwsmaccsu_vv_w helper_vwsmaccsu_vv_w_riscv32
+#define helper_vwsmaccsu_vx_b helper_vwsmaccsu_vx_b_riscv32
+#define helper_vwsmaccsu_vx_h helper_vwsmaccsu_vx_h_riscv32
+#define helper_vwsmaccsu_vx_w helper_vwsmaccsu_vx_w_riscv32
+#define helper_vwsmaccu_vv_b helper_vwsmaccu_vv_b_riscv32
+#define helper_vwsmaccu_vv_h helper_vwsmaccu_vv_h_riscv32
+#define helper_vwsmaccu_vv_w helper_vwsmaccu_vv_w_riscv32
+#define helper_vwsmaccu_vx_b helper_vwsmaccu_vx_b_riscv32
+#define helper_vwsmaccu_vx_h helper_vwsmaccu_vx_h_riscv32
+#define helper_vwsmaccu_vx_w helper_vwsmaccu_vx_w_riscv32
+#define helper_vwsmaccus_vx_b helper_vwsmaccus_vx_b_riscv32
+#define helper_vwsmaccus_vx_h helper_vwsmaccus_vx_h_riscv32
+#define helper_vwsmaccus_vx_w helper_vwsmaccus_vx_w_riscv32
+#define helper_vwsub_vv_b helper_vwsub_vv_b_riscv32
+#define helper_vwsub_vv_h helper_vwsub_vv_h_riscv32
+#define helper_vwsub_vv_w helper_vwsub_vv_w_riscv32
+#define helper_vwsub_vx_b helper_vwsub_vx_b_riscv32
+#define helper_vwsub_vx_h helper_vwsub_vx_h_riscv32
+#define helper_vwsub_vx_w helper_vwsub_vx_w_riscv32
+#define helper_vwsub_wv_b helper_vwsub_wv_b_riscv32
+#define helper_vwsub_wv_h helper_vwsub_wv_h_riscv32
+#define helper_vwsub_wv_w helper_vwsub_wv_w_riscv32
+#define helper_vwsub_wx_b helper_vwsub_wx_b_riscv32
+#define helper_vwsub_wx_h helper_vwsub_wx_h_riscv32
+#define helper_vwsub_wx_w helper_vwsub_wx_w_riscv32
+#define helper_vwsubu_vv_b helper_vwsubu_vv_b_riscv32
+#define helper_vwsubu_vv_h helper_vwsubu_vv_h_riscv32
+#define helper_vwsubu_vv_w helper_vwsubu_vv_w_riscv32
+#define helper_vwsubu_vx_b helper_vwsubu_vx_b_riscv32
+#define helper_vwsubu_vx_h helper_vwsubu_vx_h_riscv32
+#define helper_vwsubu_vx_w helper_vwsubu_vx_w_riscv32
+#define helper_vwsubu_wv_b helper_vwsubu_wv_b_riscv32
+#define helper_vwsubu_wv_h helper_vwsubu_wv_h_riscv32
+#define helper_vwsubu_wv_w helper_vwsubu_wv_w_riscv32
+#define helper_vwsubu_wx_b helper_vwsubu_wx_b_riscv32
+#define helper_vwsubu_wx_h helper_vwsubu_wx_h_riscv32
+#define helper_vwsubu_wx_w helper_vwsubu_wx_w_riscv32
+#define helper_vxor_vv_b helper_vxor_vv_b_riscv32
+#define helper_vxor_vv_d helper_vxor_vv_d_riscv32
+#define helper_vxor_vv_h helper_vxor_vv_h_riscv32
+#define helper_vxor_vv_w helper_vxor_vv_w_riscv32
+#define helper_vxor_vx_b helper_vxor_vx_b_riscv32
+#define helper_vxor_vx_d helper_vxor_vx_d_riscv32
+#define helper_vxor_vx_h helper_vxor_vx_h_riscv32
+#define helper_vxor_vx_w helper_vxor_vx_w_riscv32
 #endif
diff --git a/qemu/riscv64.h b/qemu/riscv64.h
index 1bb119334e..2f0870dc20 100644
--- a/qemu/riscv64.h
+++ b/qemu/riscv64.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_riscv64
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_riscv64
 #define tcg_gen_st_i64 tcg_gen_st_i64_riscv64
+#define tcg_gen_add_i64 tcg_gen_add_i64_riscv64
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_riscv64
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_riscv64
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_riscv64
 #define cpu_icount_to_ns cpu_icount_to_ns_riscv64
 #define cpu_is_stopped cpu_is_stopped_riscv64
 #define cpu_get_ticks cpu_get_ticks_riscv64
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_riscv64
 #define floatx80_mul floatx80_mul_riscv64
 #define floatx80_div floatx80_div_riscv64
+#define floatx80_modrem floatx80_modrem_riscv64
+#define floatx80_mod floatx80_mod_riscv64
 #define floatx80_rem floatx80_rem_riscv64
 #define floatx80_sqrt floatx80_sqrt_riscv64
 #define floatx80_eq floatx80_eq_riscv64
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_riscv64
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_riscv64
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_riscv64
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_riscv64
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_riscv64
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_riscv64
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_riscv64
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_riscv64
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_riscv64
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_riscv64
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_riscv64
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_riscv64
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_riscv64
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_riscv64
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_riscv64
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_riscv64
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_riscv64
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_riscv64
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_riscv64
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_riscv64
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_riscv64
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_riscv64
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_riscv64
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_riscv64
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_riscv64
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_riscv64
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_riscv64
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_riscv64
 #define tcg_gen_shri_vec tcg_gen_shri_vec_riscv64
 #define tcg_gen_sari_vec tcg_gen_sari_vec_riscv64
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_riscv64
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_riscv64
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_riscv64
 #define tcg_gen_add_vec tcg_gen_add_vec_riscv64
 #define tcg_gen_sub_vec tcg_gen_sub_vec_riscv64
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_riscv64
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_riscv64
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_riscv64
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_riscv64
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_riscv64
 #define tcg_gen_shls_vec tcg_gen_shls_vec_riscv64
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_riscv64
 #define tcg_gen_sars_vec tcg_gen_sars_vec_riscv64
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_riscv64
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_riscv64
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_riscv64
 #define tb_htable_lookup tb_htable_lookup_riscv64
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_riscv64
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_riscv64
 #define tlb_init tlb_init_riscv64
+#define tlb_destroy tlb_destroy_riscv64
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_riscv64
 #define tlb_flush tlb_flush_riscv64
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_riscv64
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_riscv64
 #define get_page_addr_code_hostp get_page_addr_code_hostp_riscv64
 #define get_page_addr_code get_page_addr_code_riscv64
+#define probe_access_flags probe_access_flags_riscv64
 #define probe_access probe_access_riscv64
 #define tlb_vaddr_to_host tlb_vaddr_to_host_riscv64
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_riscv64
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_riscv64
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_riscv64
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_riscv64
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_riscv64
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_riscv64
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_riscv64
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_riscv64
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_riscv64
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_riscv64
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_riscv64
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_riscv64
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_riscv64
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_riscv64
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_riscv64
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_riscv64
 #define cpu_ldub_data_ra cpu_ldub_data_ra_riscv64
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_riscv64
-#define cpu_lduw_data_ra cpu_lduw_data_ra_riscv64
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_riscv64
-#define cpu_ldl_data_ra cpu_ldl_data_ra_riscv64
-#define cpu_ldq_data_ra cpu_ldq_data_ra_riscv64
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_riscv64
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_riscv64
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_riscv64
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_riscv64
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_riscv64
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_riscv64
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_riscv64
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_riscv64
 #define cpu_ldub_data cpu_ldub_data_riscv64
 #define cpu_ldsb_data cpu_ldsb_data_riscv64
-#define cpu_lduw_data cpu_lduw_data_riscv64
-#define cpu_ldsw_data cpu_ldsw_data_riscv64
-#define cpu_ldl_data cpu_ldl_data_riscv64
-#define cpu_ldq_data cpu_ldq_data_riscv64
+#define cpu_lduw_be_data cpu_lduw_be_data_riscv64
+#define cpu_lduw_le_data cpu_lduw_le_data_riscv64
+#define cpu_ldsw_be_data cpu_ldsw_be_data_riscv64
+#define cpu_ldsw_le_data cpu_ldsw_le_data_riscv64
+#define cpu_ldl_be_data cpu_ldl_be_data_riscv64
+#define cpu_ldl_le_data cpu_ldl_le_data_riscv64
+#define cpu_ldq_le_data cpu_ldq_le_data_riscv64
+#define cpu_ldq_be_data cpu_ldq_be_data_riscv64
 #define helper_ret_stb_mmu helper_ret_stb_mmu_riscv64
 #define helper_le_stw_mmu helper_le_stw_mmu_riscv64
 #define helper_be_stw_mmu helper_be_stw_mmu_riscv64
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_riscv64
 #define helper_be_stq_mmu helper_be_stq_mmu_riscv64
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_riscv64
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_riscv64
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_riscv64
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_riscv64
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_riscv64
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_riscv64
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_riscv64
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_riscv64
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_riscv64
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_riscv64
 #define cpu_stb_data_ra cpu_stb_data_ra_riscv64
-#define cpu_stw_data_ra cpu_stw_data_ra_riscv64
-#define cpu_stl_data_ra cpu_stl_data_ra_riscv64
-#define cpu_stq_data_ra cpu_stq_data_ra_riscv64
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_riscv64
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_riscv64
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_riscv64
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_riscv64
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_riscv64
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_riscv64
 #define cpu_stb_data cpu_stb_data_riscv64
-#define cpu_stw_data cpu_stw_data_riscv64
-#define cpu_stl_data cpu_stl_data_riscv64
-#define cpu_stq_data cpu_stq_data_riscv64
+#define cpu_stw_be_data cpu_stw_be_data_riscv64
+#define cpu_stw_le_data cpu_stw_le_data_riscv64
+#define cpu_stl_be_data cpu_stl_be_data_riscv64
+#define cpu_stl_le_data cpu_stl_le_data_riscv64
+#define cpu_stq_be_data cpu_stq_be_data_riscv64
+#define cpu_stq_le_data cpu_stq_le_data_riscv64
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_riscv64
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_riscv64
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_riscv64
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_riscv64
 #define cpu_ldl_code cpu_ldl_code_riscv64
 #define cpu_ldq_code cpu_ldq_code_riscv64
+#define cpu_interrupt_handler cpu_interrupt_handler_riscv64
 #define helper_div_i32 helper_div_i32_riscv64
 #define helper_rem_i32 helper_rem_i32_riscv64
 #define helper_divu_i32 helper_divu_i32_riscv64
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_riscv64
 #define helper_gvec_sar32i helper_gvec_sar32i_riscv64
 #define helper_gvec_sar64i helper_gvec_sar64i_riscv64
+#define helper_gvec_rotl8i helper_gvec_rotl8i_riscv64
+#define helper_gvec_rotl16i helper_gvec_rotl16i_riscv64
+#define helper_gvec_rotl32i helper_gvec_rotl32i_riscv64
+#define helper_gvec_rotl64i helper_gvec_rotl64i_riscv64
 #define helper_gvec_shl8v helper_gvec_shl8v_riscv64
 #define helper_gvec_shl16v helper_gvec_shl16v_riscv64
 #define helper_gvec_shl32v helper_gvec_shl32v_riscv64
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_riscv64
 #define helper_gvec_sar32v helper_gvec_sar32v_riscv64
 #define helper_gvec_sar64v helper_gvec_sar64v_riscv64
+#define helper_gvec_rotl8v helper_gvec_rotl8v_riscv64
+#define helper_gvec_rotl16v helper_gvec_rotl16v_riscv64
+#define helper_gvec_rotl32v helper_gvec_rotl32v_riscv64
+#define helper_gvec_rotl64v helper_gvec_rotl64v_riscv64
+#define helper_gvec_rotr8v helper_gvec_rotr8v_riscv64
+#define helper_gvec_rotr16v helper_gvec_rotr16v_riscv64
+#define helper_gvec_rotr32v helper_gvec_rotr32v_riscv64
+#define helper_gvec_rotr64v helper_gvec_rotr64v_riscv64
 #define helper_gvec_eq8 helper_gvec_eq8_riscv64
 #define helper_gvec_ne8 helper_gvec_ne8_riscv64
 #define helper_gvec_lt8 helper_gvec_lt8_riscv64
@@ -1366,6 +1420,7 @@
 #define helper_mret helper_mret_riscv64
 #define helper_wfi helper_wfi_riscv64
 #define helper_tlb_flush helper_tlb_flush_riscv64
+#define helper_hyp_tlb_flush helper_hyp_tlb_flush_riscv64
 #define pmp_hart_has_privs pmp_hart_has_privs_riscv64
 #define pmpcfg_csr_write pmpcfg_csr_write_riscv64
 #define pmpcfg_csr_read pmpcfg_csr_read_riscv64
@@ -1386,4 +1441,1006 @@
 #define gen_helper_tlb_flush gen_helper_tlb_flush_riscv64
 #define riscv_fpr_regnames riscv_fpr_regnames_riscv64
 #define riscv_int_regnames riscv_int_regnames_riscv64
+#define fclass_d fclass_d_riscv64
+#define fclass_h fclass_h_riscv64
+#define fclass_s fclass_s_riscv64
+#define helper_vaadd_vv_b helper_vaadd_vv_b_riscv64
+#define helper_vaadd_vv_d helper_vaadd_vv_d_riscv64
+#define helper_vaadd_vv_h helper_vaadd_vv_h_riscv64
+#define helper_vaadd_vv_w helper_vaadd_vv_w_riscv64
+#define helper_vaadd_vx_b helper_vaadd_vx_b_riscv64
+#define helper_vaadd_vx_d helper_vaadd_vx_d_riscv64
+#define helper_vaadd_vx_h helper_vaadd_vx_h_riscv64
+#define helper_vaadd_vx_w helper_vaadd_vx_w_riscv64
+#define helper_vadc_vvm_b helper_vadc_vvm_b_riscv64
+#define helper_vadc_vvm_d helper_vadc_vvm_d_riscv64
+#define helper_vadc_vvm_h helper_vadc_vvm_h_riscv64
+#define helper_vadc_vvm_w helper_vadc_vvm_w_riscv64
+#define helper_vadc_vxm_b helper_vadc_vxm_b_riscv64
+#define helper_vadc_vxm_d helper_vadc_vxm_d_riscv64
+#define helper_vadc_vxm_h helper_vadc_vxm_h_riscv64
+#define helper_vadc_vxm_w helper_vadc_vxm_w_riscv64
+#define helper_vadd_vv_b helper_vadd_vv_b_riscv64
+#define helper_vadd_vv_d helper_vadd_vv_d_riscv64
+#define helper_vadd_vv_h helper_vadd_vv_h_riscv64
+#define helper_vadd_vv_w helper_vadd_vv_w_riscv64
+#define helper_vadd_vx_b helper_vadd_vx_b_riscv64
+#define helper_vadd_vx_d helper_vadd_vx_d_riscv64
+#define helper_vadd_vx_h helper_vadd_vx_h_riscv64
+#define helper_vadd_vx_w helper_vadd_vx_w_riscv64
+#define helper_vamoaddw_v_w helper_vamoaddw_v_w_riscv64
+#define helper_vamoandw_v_w helper_vamoandw_v_w_riscv64
+#define helper_vamomaxuw_v_w helper_vamomaxuw_v_w_riscv64
+#define helper_vamomaxw_v_w helper_vamomaxw_v_w_riscv64
+#define helper_vamominuw_v_w helper_vamominuw_v_w_riscv64
+#define helper_vamominw_v_w helper_vamominw_v_w_riscv64
+#define helper_vamoorw_v_w helper_vamoorw_v_w_riscv64
+#define helper_vamoswapw_v_w helper_vamoswapw_v_w_riscv64
+#define helper_vamoxorw_v_w helper_vamoxorw_v_w_riscv64
+#define helper_vand_vv_b helper_vand_vv_b_riscv64
+#define helper_vand_vv_d helper_vand_vv_d_riscv64
+#define helper_vand_vv_h helper_vand_vv_h_riscv64
+#define helper_vand_vv_w helper_vand_vv_w_riscv64
+#define helper_vand_vx_b helper_vand_vx_b_riscv64
+#define helper_vand_vx_d helper_vand_vx_d_riscv64
+#define helper_vand_vx_h helper_vand_vx_h_riscv64
+#define helper_vand_vx_w helper_vand_vx_w_riscv64
+#define helper_vasub_vv_b helper_vasub_vv_b_riscv64
+#define helper_vasub_vv_d helper_vasub_vv_d_riscv64
+#define helper_vasub_vv_h helper_vasub_vv_h_riscv64
+#define helper_vasub_vv_w helper_vasub_vv_w_riscv64
+#define helper_vasub_vx_b helper_vasub_vx_b_riscv64
+#define helper_vasub_vx_d helper_vasub_vx_d_riscv64
+#define helper_vasub_vx_h helper_vasub_vx_h_riscv64
+#define helper_vasub_vx_w helper_vasub_vx_w_riscv64
+#define helper_vcompress_vm_b helper_vcompress_vm_b_riscv64
+#define helper_vcompress_vm_d helper_vcompress_vm_d_riscv64
+#define helper_vcompress_vm_h helper_vcompress_vm_h_riscv64
+#define helper_vcompress_vm_w helper_vcompress_vm_w_riscv64
+#define helper_vdiv_vv_b helper_vdiv_vv_b_riscv64
+#define helper_vdiv_vv_d helper_vdiv_vv_d_riscv64
+#define helper_vdiv_vv_h helper_vdiv_vv_h_riscv64
+#define helper_vdiv_vv_w helper_vdiv_vv_w_riscv64
+#define helper_vdiv_vx_b helper_vdiv_vx_b_riscv64
+#define helper_vdiv_vx_d helper_vdiv_vx_d_riscv64
+#define helper_vdiv_vx_h helper_vdiv_vx_h_riscv64
+#define helper_vdiv_vx_w helper_vdiv_vx_w_riscv64
+#define helper_vdivu_vv_b helper_vdivu_vv_b_riscv64
+#define helper_vdivu_vv_d helper_vdivu_vv_d_riscv64
+#define helper_vdivu_vv_h helper_vdivu_vv_h_riscv64
+#define helper_vdivu_vv_w helper_vdivu_vv_w_riscv64
+#define helper_vdivu_vx_b helper_vdivu_vx_b_riscv64
+#define helper_vdivu_vx_d helper_vdivu_vx_d_riscv64
+#define helper_vdivu_vx_h helper_vdivu_vx_h_riscv64
+#define helper_vdivu_vx_w helper_vdivu_vx_w_riscv64
+#define helper_vec_rsubs16 helper_vec_rsubs16_riscv64
+#define helper_vec_rsubs32 helper_vec_rsubs32_riscv64
+#define helper_vec_rsubs64 helper_vec_rsubs64_riscv64
+#define helper_vec_rsubs8 helper_vec_rsubs8_riscv64
+#define helper_vfadd_vf_d helper_vfadd_vf_d_riscv64
+#define helper_vfadd_vf_h helper_vfadd_vf_h_riscv64
+#define helper_vfadd_vf_w helper_vfadd_vf_w_riscv64
+#define helper_vfadd_vv_d helper_vfadd_vv_d_riscv64
+#define helper_vfadd_vv_h helper_vfadd_vv_h_riscv64
+#define helper_vfadd_vv_w helper_vfadd_vv_w_riscv64
+#define helper_vfclass_v_d helper_vfclass_v_d_riscv64
+#define helper_vfclass_v_h helper_vfclass_v_h_riscv64
+#define helper_vfclass_v_w helper_vfclass_v_w_riscv64
+#define helper_vfcvt_f_x_v_d helper_vfcvt_f_x_v_d_riscv64
+#define helper_vfcvt_f_x_v_h helper_vfcvt_f_x_v_h_riscv64
+#define helper_vfcvt_f_x_v_w helper_vfcvt_f_x_v_w_riscv64
+#define helper_vfcvt_f_xu_v_d helper_vfcvt_f_xu_v_d_riscv64
+#define helper_vfcvt_f_xu_v_h helper_vfcvt_f_xu_v_h_riscv64
+#define helper_vfcvt_f_xu_v_w helper_vfcvt_f_xu_v_w_riscv64
+#define helper_vfcvt_x_f_v_d helper_vfcvt_x_f_v_d_riscv64
+#define helper_vfcvt_x_f_v_h helper_vfcvt_x_f_v_h_riscv64
+#define helper_vfcvt_x_f_v_w helper_vfcvt_x_f_v_w_riscv64
+#define helper_vfcvt_xu_f_v_d helper_vfcvt_xu_f_v_d_riscv64
+#define helper_vfcvt_xu_f_v_h helper_vfcvt_xu_f_v_h_riscv64
+#define helper_vfcvt_xu_f_v_w helper_vfcvt_xu_f_v_w_riscv64
+#define helper_vfdiv_vf_d helper_vfdiv_vf_d_riscv64
+#define helper_vfdiv_vf_h helper_vfdiv_vf_h_riscv64
+#define helper_vfdiv_vf_w helper_vfdiv_vf_w_riscv64
+#define helper_vfdiv_vv_d helper_vfdiv_vv_d_riscv64
+#define helper_vfdiv_vv_h helper_vfdiv_vv_h_riscv64
+#define helper_vfdiv_vv_w helper_vfdiv_vv_w_riscv64
+#define helper_vfmacc_vf_d helper_vfmacc_vf_d_riscv64
+#define helper_vfmacc_vf_h helper_vfmacc_vf_h_riscv64
+#define helper_vfmacc_vf_w helper_vfmacc_vf_w_riscv64
+#define helper_vfmacc_vv_d helper_vfmacc_vv_d_riscv64
+#define helper_vfmacc_vv_h helper_vfmacc_vv_h_riscv64
+#define helper_vfmacc_vv_w helper_vfmacc_vv_w_riscv64
+#define helper_vfmadd_vf_d helper_vfmadd_vf_d_riscv64
+#define helper_vfmadd_vf_h helper_vfmadd_vf_h_riscv64
+#define helper_vfmadd_vf_w helper_vfmadd_vf_w_riscv64
+#define helper_vfmadd_vv_d helper_vfmadd_vv_d_riscv64
+#define helper_vfmadd_vv_h helper_vfmadd_vv_h_riscv64
+#define helper_vfmadd_vv_w helper_vfmadd_vv_w_riscv64
+#define helper_vfmax_vf_d helper_vfmax_vf_d_riscv64
+#define helper_vfmax_vf_h helper_vfmax_vf_h_riscv64
+#define helper_vfmax_vf_w helper_vfmax_vf_w_riscv64
+#define helper_vfmax_vv_d helper_vfmax_vv_d_riscv64
+#define helper_vfmax_vv_h helper_vfmax_vv_h_riscv64
+#define helper_vfmax_vv_w helper_vfmax_vv_w_riscv64
+#define helper_vfmerge_vfm_d helper_vfmerge_vfm_d_riscv64
+#define helper_vfmerge_vfm_h helper_vfmerge_vfm_h_riscv64
+#define helper_vfmerge_vfm_w helper_vfmerge_vfm_w_riscv64
+#define helper_vfmin_vf_d helper_vfmin_vf_d_riscv64
+#define helper_vfmin_vf_h helper_vfmin_vf_h_riscv64
+#define helper_vfmin_vf_w helper_vfmin_vf_w_riscv64
+#define helper_vfmin_vv_d helper_vfmin_vv_d_riscv64
+#define helper_vfmin_vv_h helper_vfmin_vv_h_riscv64
+#define helper_vfmin_vv_w helper_vfmin_vv_w_riscv64
+#define helper_vfmsac_vf_d helper_vfmsac_vf_d_riscv64
+#define helper_vfmsac_vf_h helper_vfmsac_vf_h_riscv64
+#define helper_vfmsac_vf_w helper_vfmsac_vf_w_riscv64
+#define helper_vfmsac_vv_d helper_vfmsac_vv_d_riscv64
+#define helper_vfmsac_vv_h helper_vfmsac_vv_h_riscv64
+#define helper_vfmsac_vv_w helper_vfmsac_vv_w_riscv64
+#define helper_vfmsub_vf_d helper_vfmsub_vf_d_riscv64
+#define helper_vfmsub_vf_h helper_vfmsub_vf_h_riscv64
+#define helper_vfmsub_vf_w helper_vfmsub_vf_w_riscv64
+#define helper_vfmsub_vv_d helper_vfmsub_vv_d_riscv64
+#define helper_vfmsub_vv_h helper_vfmsub_vv_h_riscv64
+#define helper_vfmsub_vv_w helper_vfmsub_vv_w_riscv64
+#define helper_vfmul_vf_d helper_vfmul_vf_d_riscv64
+#define helper_vfmul_vf_h helper_vfmul_vf_h_riscv64
+#define helper_vfmul_vf_w helper_vfmul_vf_w_riscv64
+#define helper_vfmul_vv_d helper_vfmul_vv_d_riscv64
+#define helper_vfmul_vv_h helper_vfmul_vv_h_riscv64
+#define helper_vfmul_vv_w helper_vfmul_vv_w_riscv64
+#define helper_vfncvt_f_f_v_h helper_vfncvt_f_f_v_h_riscv64
+#define helper_vfncvt_f_f_v_w helper_vfncvt_f_f_v_w_riscv64
+#define helper_vfncvt_f_x_v_h helper_vfncvt_f_x_v_h_riscv64
+#define helper_vfncvt_f_x_v_w helper_vfncvt_f_x_v_w_riscv64
+#define helper_vfncvt_f_xu_v_h helper_vfncvt_f_xu_v_h_riscv64
+#define helper_vfncvt_f_xu_v_w helper_vfncvt_f_xu_v_w_riscv64
+#define helper_vfncvt_x_f_v_h helper_vfncvt_x_f_v_h_riscv64
+#define helper_vfncvt_x_f_v_w helper_vfncvt_x_f_v_w_riscv64
+#define helper_vfncvt_xu_f_v_h helper_vfncvt_xu_f_v_h_riscv64
+#define helper_vfncvt_xu_f_v_w helper_vfncvt_xu_f_v_w_riscv64
+#define helper_vfnmacc_vf_d helper_vfnmacc_vf_d_riscv64
+#define helper_vfnmacc_vf_h helper_vfnmacc_vf_h_riscv64
+#define helper_vfnmacc_vf_w helper_vfnmacc_vf_w_riscv64
+#define helper_vfnmacc_vv_d helper_vfnmacc_vv_d_riscv64
+#define helper_vfnmacc_vv_h helper_vfnmacc_vv_h_riscv64
+#define helper_vfnmacc_vv_w helper_vfnmacc_vv_w_riscv64
+#define helper_vfnmadd_vf_d helper_vfnmadd_vf_d_riscv64
+#define helper_vfnmadd_vf_h helper_vfnmadd_vf_h_riscv64
+#define helper_vfnmadd_vf_w helper_vfnmadd_vf_w_riscv64
+#define helper_vfnmadd_vv_d helper_vfnmadd_vv_d_riscv64
+#define helper_vfnmadd_vv_h helper_vfnmadd_vv_h_riscv64
+#define helper_vfnmadd_vv_w helper_vfnmadd_vv_w_riscv64
+#define helper_vfnmsac_vf_d helper_vfnmsac_vf_d_riscv64
+#define helper_vfnmsac_vf_h helper_vfnmsac_vf_h_riscv64
+#define helper_vfnmsac_vf_w helper_vfnmsac_vf_w_riscv64
+#define helper_vfnmsac_vv_d helper_vfnmsac_vv_d_riscv64
+#define helper_vfnmsac_vv_h helper_vfnmsac_vv_h_riscv64
+#define helper_vfnmsac_vv_w helper_vfnmsac_vv_w_riscv64
+#define helper_vfnmsub_vf_d helper_vfnmsub_vf_d_riscv64
+#define helper_vfnmsub_vf_h helper_vfnmsub_vf_h_riscv64
+#define helper_vfnmsub_vf_w helper_vfnmsub_vf_w_riscv64
+#define helper_vfnmsub_vv_d helper_vfnmsub_vv_d_riscv64
+#define helper_vfnmsub_vv_h helper_vfnmsub_vv_h_riscv64
+#define helper_vfnmsub_vv_w helper_vfnmsub_vv_w_riscv64
+#define helper_vfrdiv_vf_d helper_vfrdiv_vf_d_riscv64
+#define helper_vfrdiv_vf_h helper_vfrdiv_vf_h_riscv64
+#define helper_vfrdiv_vf_w helper_vfrdiv_vf_w_riscv64
+#define helper_vfredmax_vs_d helper_vfredmax_vs_d_riscv64
+#define helper_vfredmax_vs_h helper_vfredmax_vs_h_riscv64
+#define helper_vfredmax_vs_w helper_vfredmax_vs_w_riscv64
+#define helper_vfredmin_vs_d helper_vfredmin_vs_d_riscv64
+#define helper_vfredmin_vs_h helper_vfredmin_vs_h_riscv64
+#define helper_vfredmin_vs_w helper_vfredmin_vs_w_riscv64
+#define helper_vfredsum_vs_d helper_vfredsum_vs_d_riscv64
+#define helper_vfredsum_vs_h helper_vfredsum_vs_h_riscv64
+#define helper_vfredsum_vs_w helper_vfredsum_vs_w_riscv64
+#define helper_vfrsub_vf_d helper_vfrsub_vf_d_riscv64
+#define helper_vfrsub_vf_h helper_vfrsub_vf_h_riscv64
+#define helper_vfrsub_vf_w helper_vfrsub_vf_w_riscv64
+#define helper_vfsgnj_vf_d helper_vfsgnj_vf_d_riscv64
+#define helper_vfsgnj_vf_h helper_vfsgnj_vf_h_riscv64
+#define helper_vfsgnj_vf_w helper_vfsgnj_vf_w_riscv64
+#define helper_vfsgnj_vv_d helper_vfsgnj_vv_d_riscv64
+#define helper_vfsgnj_vv_h helper_vfsgnj_vv_h_riscv64
+#define helper_vfsgnj_vv_w helper_vfsgnj_vv_w_riscv64
+#define helper_vfsgnjn_vf_d helper_vfsgnjn_vf_d_riscv64
+#define helper_vfsgnjn_vf_h helper_vfsgnjn_vf_h_riscv64
+#define helper_vfsgnjn_vf_w helper_vfsgnjn_vf_w_riscv64
+#define helper_vfsgnjn_vv_d helper_vfsgnjn_vv_d_riscv64
+#define helper_vfsgnjn_vv_h helper_vfsgnjn_vv_h_riscv64
+#define helper_vfsgnjn_vv_w helper_vfsgnjn_vv_w_riscv64
+#define helper_vfsgnjx_vf_d helper_vfsgnjx_vf_d_riscv64
+#define helper_vfsgnjx_vf_h helper_vfsgnjx_vf_h_riscv64
+#define helper_vfsgnjx_vf_w helper_vfsgnjx_vf_w_riscv64
+#define helper_vfsgnjx_vv_d helper_vfsgnjx_vv_d_riscv64
+#define helper_vfsgnjx_vv_h helper_vfsgnjx_vv_h_riscv64
+#define helper_vfsgnjx_vv_w helper_vfsgnjx_vv_w_riscv64
+#define helper_vfsqrt_v_d helper_vfsqrt_v_d_riscv64
+#define helper_vfsqrt_v_h helper_vfsqrt_v_h_riscv64
+#define helper_vfsqrt_v_w helper_vfsqrt_v_w_riscv64
+#define helper_vfsub_vf_d helper_vfsub_vf_d_riscv64
+#define helper_vfsub_vf_h helper_vfsub_vf_h_riscv64
+#define helper_vfsub_vf_w helper_vfsub_vf_w_riscv64
+#define helper_vfsub_vv_d helper_vfsub_vv_d_riscv64
+#define helper_vfsub_vv_h helper_vfsub_vv_h_riscv64
+#define helper_vfsub_vv_w helper_vfsub_vv_w_riscv64
+#define helper_vfwadd_vf_h helper_vfwadd_vf_h_riscv64
+#define helper_vfwadd_vf_w helper_vfwadd_vf_w_riscv64
+#define helper_vfwadd_vv_h helper_vfwadd_vv_h_riscv64
+#define helper_vfwadd_vv_w helper_vfwadd_vv_w_riscv64
+#define helper_vfwadd_wf_h helper_vfwadd_wf_h_riscv64
+#define helper_vfwadd_wf_w helper_vfwadd_wf_w_riscv64
+#define helper_vfwadd_wv_h helper_vfwadd_wv_h_riscv64
+#define helper_vfwadd_wv_w helper_vfwadd_wv_w_riscv64
+#define helper_vfwcvt_f_f_v_h helper_vfwcvt_f_f_v_h_riscv64
+#define helper_vfwcvt_f_f_v_w helper_vfwcvt_f_f_v_w_riscv64
+#define helper_vfwcvt_f_x_v_h helper_vfwcvt_f_x_v_h_riscv64
+#define helper_vfwcvt_f_x_v_w helper_vfwcvt_f_x_v_w_riscv64
+#define helper_vfwcvt_f_xu_v_h helper_vfwcvt_f_xu_v_h_riscv64
+#define helper_vfwcvt_f_xu_v_w helper_vfwcvt_f_xu_v_w_riscv64
+#define helper_vfwcvt_x_f_v_h helper_vfwcvt_x_f_v_h_riscv64
+#define helper_vfwcvt_x_f_v_w helper_vfwcvt_x_f_v_w_riscv64
+#define helper_vfwcvt_xu_f_v_h helper_vfwcvt_xu_f_v_h_riscv64
+#define helper_vfwcvt_xu_f_v_w helper_vfwcvt_xu_f_v_w_riscv64
+#define helper_vfwmacc_vf_h helper_vfwmacc_vf_h_riscv64
+#define helper_vfwmacc_vf_w helper_vfwmacc_vf_w_riscv64
+#define helper_vfwmacc_vv_h helper_vfwmacc_vv_h_riscv64
+#define helper_vfwmacc_vv_w helper_vfwmacc_vv_w_riscv64
+#define helper_vfwmsac_vf_h helper_vfwmsac_vf_h_riscv64
+#define helper_vfwmsac_vf_w helper_vfwmsac_vf_w_riscv64
+#define helper_vfwmsac_vv_h helper_vfwmsac_vv_h_riscv64
+#define helper_vfwmsac_vv_w helper_vfwmsac_vv_w_riscv64
+#define helper_vfwmul_vf_h helper_vfwmul_vf_h_riscv64
+#define helper_vfwmul_vf_w helper_vfwmul_vf_w_riscv64
+#define helper_vfwmul_vv_h helper_vfwmul_vv_h_riscv64
+#define helper_vfwmul_vv_w helper_vfwmul_vv_w_riscv64
+#define helper_vfwnmacc_vf_h helper_vfwnmacc_vf_h_riscv64
+#define helper_vfwnmacc_vf_w helper_vfwnmacc_vf_w_riscv64
+#define helper_vfwnmacc_vv_h helper_vfwnmacc_vv_h_riscv64
+#define helper_vfwnmacc_vv_w helper_vfwnmacc_vv_w_riscv64
+#define helper_vfwnmsac_vf_h helper_vfwnmsac_vf_h_riscv64
+#define helper_vfwnmsac_vf_w helper_vfwnmsac_vf_w_riscv64
+#define helper_vfwnmsac_vv_h helper_vfwnmsac_vv_h_riscv64
+#define helper_vfwnmsac_vv_w helper_vfwnmsac_vv_w_riscv64
+#define helper_vfwredsum_vs_h helper_vfwredsum_vs_h_riscv64
+#define helper_vfwredsum_vs_w helper_vfwredsum_vs_w_riscv64
+#define helper_vfwsub_vf_h helper_vfwsub_vf_h_riscv64
+#define helper_vfwsub_vf_w helper_vfwsub_vf_w_riscv64
+#define helper_vfwsub_vv_h helper_vfwsub_vv_h_riscv64
+#define helper_vfwsub_vv_w helper_vfwsub_vv_w_riscv64
+#define helper_vfwsub_wf_h helper_vfwsub_wf_h_riscv64
+#define helper_vfwsub_wf_w helper_vfwsub_wf_w_riscv64
+#define helper_vfwsub_wv_h helper_vfwsub_wv_h_riscv64
+#define helper_vfwsub_wv_w helper_vfwsub_wv_w_riscv64
+#define helper_vid_v_b helper_vid_v_b_riscv64
+#define helper_vid_v_d helper_vid_v_d_riscv64
+#define helper_vid_v_h helper_vid_v_h_riscv64
+#define helper_vid_v_w helper_vid_v_w_riscv64
+#define helper_viota_m_b helper_viota_m_b_riscv64
+#define helper_viota_m_d helper_viota_m_d_riscv64
+#define helper_viota_m_h helper_viota_m_h_riscv64
+#define helper_viota_m_w helper_viota_m_w_riscv64
+#define helper_vlb_v_b helper_vlb_v_b_riscv64
+#define helper_vlb_v_b_mask helper_vlb_v_b_mask_riscv64
+#define helper_vlb_v_d helper_vlb_v_d_riscv64
+#define helper_vlb_v_d_mask helper_vlb_v_d_mask_riscv64
+#define helper_vlb_v_h helper_vlb_v_h_riscv64
+#define helper_vlb_v_h_mask helper_vlb_v_h_mask_riscv64
+#define helper_vlb_v_w helper_vlb_v_w_riscv64
+#define helper_vlb_v_w_mask helper_vlb_v_w_mask_riscv64
+#define helper_vlbff_v_b helper_vlbff_v_b_riscv64
+#define helper_vlbff_v_d helper_vlbff_v_d_riscv64
+#define helper_vlbff_v_h helper_vlbff_v_h_riscv64
+#define helper_vlbff_v_w helper_vlbff_v_w_riscv64
+#define helper_vlbu_v_b helper_vlbu_v_b_riscv64
+#define helper_vlbu_v_b_mask helper_vlbu_v_b_mask_riscv64
+#define helper_vlbu_v_d helper_vlbu_v_d_riscv64
+#define helper_vlbu_v_d_mask helper_vlbu_v_d_mask_riscv64
+#define helper_vlbu_v_h helper_vlbu_v_h_riscv64
+#define helper_vlbu_v_h_mask helper_vlbu_v_h_mask_riscv64
+#define helper_vlbu_v_w helper_vlbu_v_w_riscv64
+#define helper_vlbu_v_w_mask helper_vlbu_v_w_mask_riscv64
+#define helper_vlbuff_v_b helper_vlbuff_v_b_riscv64
+#define helper_vlbuff_v_d helper_vlbuff_v_d_riscv64
+#define helper_vlbuff_v_h helper_vlbuff_v_h_riscv64
+#define helper_vlbuff_v_w helper_vlbuff_v_w_riscv64
+#define helper_vle_v_b helper_vle_v_b_riscv64
+#define helper_vle_v_b_mask helper_vle_v_b_mask_riscv64
+#define helper_vle_v_d helper_vle_v_d_riscv64
+#define helper_vle_v_d_mask helper_vle_v_d_mask_riscv64
+#define helper_vle_v_h helper_vle_v_h_riscv64
+#define helper_vle_v_h_mask helper_vle_v_h_mask_riscv64
+#define helper_vle_v_w helper_vle_v_w_riscv64
+#define helper_vle_v_w_mask helper_vle_v_w_mask_riscv64
+#define helper_vleff_v_b helper_vleff_v_b_riscv64
+#define helper_vleff_v_d helper_vleff_v_d_riscv64
+#define helper_vleff_v_h helper_vleff_v_h_riscv64
+#define helper_vleff_v_w helper_vleff_v_w_riscv64
+#define helper_vlh_v_d helper_vlh_v_d_riscv64
+#define helper_vlh_v_d_mask helper_vlh_v_d_mask_riscv64
+#define helper_vlh_v_h helper_vlh_v_h_riscv64
+#define helper_vlh_v_h_mask helper_vlh_v_h_mask_riscv64
+#define helper_vlh_v_w helper_vlh_v_w_riscv64
+#define helper_vlh_v_w_mask helper_vlh_v_w_mask_riscv64
+#define helper_vlhff_v_d helper_vlhff_v_d_riscv64
+#define helper_vlhff_v_h helper_vlhff_v_h_riscv64
+#define helper_vlhff_v_w helper_vlhff_v_w_riscv64
+#define helper_vlhu_v_d helper_vlhu_v_d_riscv64
+#define helper_vlhu_v_d_mask helper_vlhu_v_d_mask_riscv64
+#define helper_vlhu_v_h helper_vlhu_v_h_riscv64
+#define helper_vlhu_v_h_mask helper_vlhu_v_h_mask_riscv64
+#define helper_vlhu_v_w helper_vlhu_v_w_riscv64
+#define helper_vlhu_v_w_mask helper_vlhu_v_w_mask_riscv64
+#define helper_vlhuff_v_d helper_vlhuff_v_d_riscv64
+#define helper_vlhuff_v_h helper_vlhuff_v_h_riscv64
+#define helper_vlhuff_v_w helper_vlhuff_v_w_riscv64
+#define helper_vlsb_v_b helper_vlsb_v_b_riscv64
+#define helper_vlsb_v_d helper_vlsb_v_d_riscv64
+#define helper_vlsb_v_h helper_vlsb_v_h_riscv64
+#define helper_vlsb_v_w helper_vlsb_v_w_riscv64
+#define helper_vlsbu_v_b helper_vlsbu_v_b_riscv64
+#define helper_vlsbu_v_d helper_vlsbu_v_d_riscv64
+#define helper_vlsbu_v_h helper_vlsbu_v_h_riscv64
+#define helper_vlsbu_v_w helper_vlsbu_v_w_riscv64
+#define helper_vlse_v_b helper_vlse_v_b_riscv64
+#define helper_vlse_v_d helper_vlse_v_d_riscv64
+#define helper_vlse_v_h helper_vlse_v_h_riscv64
+#define helper_vlse_v_w helper_vlse_v_w_riscv64
+#define helper_vlsh_v_d helper_vlsh_v_d_riscv64
+#define helper_vlsh_v_h helper_vlsh_v_h_riscv64
+#define helper_vlsh_v_w helper_vlsh_v_w_riscv64
+#define helper_vlshu_v_d helper_vlshu_v_d_riscv64
+#define helper_vlshu_v_h helper_vlshu_v_h_riscv64
+#define helper_vlshu_v_w helper_vlshu_v_w_riscv64
+#define helper_vlsw_v_d helper_vlsw_v_d_riscv64
+#define helper_vlsw_v_w helper_vlsw_v_w_riscv64
+#define helper_vlswu_v_d helper_vlswu_v_d_riscv64
+#define helper_vlswu_v_w helper_vlswu_v_w_riscv64
+#define helper_vlw_v_d helper_vlw_v_d_riscv64
+#define helper_vlw_v_d_mask helper_vlw_v_d_mask_riscv64
+#define helper_vlw_v_w helper_vlw_v_w_riscv64
+#define helper_vlw_v_w_mask helper_vlw_v_w_mask_riscv64
+#define helper_vlwff_v_d helper_vlwff_v_d_riscv64
+#define helper_vlwff_v_w helper_vlwff_v_w_riscv64
+#define helper_vlwu_v_d helper_vlwu_v_d_riscv64
+#define helper_vlwu_v_d_mask helper_vlwu_v_d_mask_riscv64
+#define helper_vlwu_v_w helper_vlwu_v_w_riscv64
+#define helper_vlwu_v_w_mask helper_vlwu_v_w_mask_riscv64
+#define helper_vlwuff_v_d helper_vlwuff_v_d_riscv64
+#define helper_vlwuff_v_w helper_vlwuff_v_w_riscv64
+#define helper_vlxb_v_b helper_vlxb_v_b_riscv64
+#define helper_vlxb_v_d helper_vlxb_v_d_riscv64
+#define helper_vlxb_v_h helper_vlxb_v_h_riscv64
+#define helper_vlxb_v_w helper_vlxb_v_w_riscv64
+#define helper_vlxbu_v_b helper_vlxbu_v_b_riscv64
+#define helper_vlxbu_v_d helper_vlxbu_v_d_riscv64
+#define helper_vlxbu_v_h helper_vlxbu_v_h_riscv64
+#define helper_vlxbu_v_w helper_vlxbu_v_w_riscv64
+#define helper_vlxe_v_b helper_vlxe_v_b_riscv64
+#define helper_vlxe_v_d helper_vlxe_v_d_riscv64
+#define helper_vlxe_v_h helper_vlxe_v_h_riscv64
+#define helper_vlxe_v_w helper_vlxe_v_w_riscv64
+#define helper_vlxh_v_d helper_vlxh_v_d_riscv64
+#define helper_vlxh_v_h helper_vlxh_v_h_riscv64
+#define helper_vlxh_v_w helper_vlxh_v_w_riscv64
+#define helper_vlxhu_v_d helper_vlxhu_v_d_riscv64
+#define helper_vlxhu_v_h helper_vlxhu_v_h_riscv64
+#define helper_vlxhu_v_w helper_vlxhu_v_w_riscv64
+#define helper_vlxw_v_d helper_vlxw_v_d_riscv64
+#define helper_vlxw_v_w helper_vlxw_v_w_riscv64
+#define helper_vlxwu_v_d helper_vlxwu_v_d_riscv64
+#define helper_vlxwu_v_w helper_vlxwu_v_w_riscv64
+#define helper_vmacc_vv_b helper_vmacc_vv_b_riscv64
+#define helper_vmacc_vv_d helper_vmacc_vv_d_riscv64
+#define helper_vmacc_vv_h helper_vmacc_vv_h_riscv64
+#define helper_vmacc_vv_w helper_vmacc_vv_w_riscv64
+#define helper_vmacc_vx_b helper_vmacc_vx_b_riscv64
+#define helper_vmacc_vx_d helper_vmacc_vx_d_riscv64
+#define helper_vmacc_vx_h helper_vmacc_vx_h_riscv64
+#define helper_vmacc_vx_w helper_vmacc_vx_w_riscv64
+#define helper_vmadc_vvm_b helper_vmadc_vvm_b_riscv64
+#define helper_vmadc_vvm_d helper_vmadc_vvm_d_riscv64
+#define helper_vmadc_vvm_h helper_vmadc_vvm_h_riscv64
+#define helper_vmadc_vvm_w helper_vmadc_vvm_w_riscv64
+#define helper_vmadc_vxm_b helper_vmadc_vxm_b_riscv64
+#define helper_vmadc_vxm_d helper_vmadc_vxm_d_riscv64
+#define helper_vmadc_vxm_h helper_vmadc_vxm_h_riscv64
+#define helper_vmadc_vxm_w helper_vmadc_vxm_w_riscv64
+#define helper_vmadd_vv_b helper_vmadd_vv_b_riscv64
+#define helper_vmadd_vv_d helper_vmadd_vv_d_riscv64
+#define helper_vmadd_vv_h helper_vmadd_vv_h_riscv64
+#define helper_vmadd_vv_w helper_vmadd_vv_w_riscv64
+#define helper_vmadd_vx_b helper_vmadd_vx_b_riscv64
+#define helper_vmadd_vx_d helper_vmadd_vx_d_riscv64
+#define helper_vmadd_vx_h helper_vmadd_vx_h_riscv64
+#define helper_vmadd_vx_w helper_vmadd_vx_w_riscv64
+#define helper_vmand_mm helper_vmand_mm_riscv64
+#define helper_vmandnot_mm helper_vmandnot_mm_riscv64
+#define helper_vmax_vv_b helper_vmax_vv_b_riscv64
+#define helper_vmax_vv_d helper_vmax_vv_d_riscv64
+#define helper_vmax_vv_h helper_vmax_vv_h_riscv64
+#define helper_vmax_vv_w helper_vmax_vv_w_riscv64
+#define helper_vmax_vx_b helper_vmax_vx_b_riscv64
+#define helper_vmax_vx_d helper_vmax_vx_d_riscv64
+#define helper_vmax_vx_h helper_vmax_vx_h_riscv64
+#define helper_vmax_vx_w helper_vmax_vx_w_riscv64
+#define helper_vmaxu_vv_b helper_vmaxu_vv_b_riscv64
+#define helper_vmaxu_vv_d helper_vmaxu_vv_d_riscv64
+#define helper_vmaxu_vv_h helper_vmaxu_vv_h_riscv64
+#define helper_vmaxu_vv_w helper_vmaxu_vv_w_riscv64
+#define helper_vmaxu_vx_b helper_vmaxu_vx_b_riscv64
+#define helper_vmaxu_vx_d helper_vmaxu_vx_d_riscv64
+#define helper_vmaxu_vx_h helper_vmaxu_vx_h_riscv64
+#define helper_vmaxu_vx_w helper_vmaxu_vx_w_riscv64
+#define helper_vmerge_vvm_b helper_vmerge_vvm_b_riscv64
+#define helper_vmerge_vvm_d helper_vmerge_vvm_d_riscv64
+#define helper_vmerge_vvm_h helper_vmerge_vvm_h_riscv64
+#define helper_vmerge_vvm_w helper_vmerge_vvm_w_riscv64
+#define helper_vmerge_vxm_b helper_vmerge_vxm_b_riscv64
+#define helper_vmerge_vxm_d helper_vmerge_vxm_d_riscv64
+#define helper_vmerge_vxm_h helper_vmerge_vxm_h_riscv64
+#define helper_vmerge_vxm_w helper_vmerge_vxm_w_riscv64
+#define helper_vmfeq_vf_d helper_vmfeq_vf_d_riscv64
+#define helper_vmfeq_vf_h helper_vmfeq_vf_h_riscv64
+#define helper_vmfeq_vf_w helper_vmfeq_vf_w_riscv64
+#define helper_vmfeq_vv_d helper_vmfeq_vv_d_riscv64
+#define helper_vmfeq_vv_h helper_vmfeq_vv_h_riscv64
+#define helper_vmfeq_vv_w helper_vmfeq_vv_w_riscv64
+#define helper_vmfge_vf_d helper_vmfge_vf_d_riscv64
+#define helper_vmfge_vf_h helper_vmfge_vf_h_riscv64
+#define helper_vmfge_vf_w helper_vmfge_vf_w_riscv64
+#define helper_vmfgt_vf_d helper_vmfgt_vf_d_riscv64
+#define helper_vmfgt_vf_h helper_vmfgt_vf_h_riscv64
+#define helper_vmfgt_vf_w helper_vmfgt_vf_w_riscv64
+#define helper_vmfirst_m helper_vmfirst_m_riscv64
+#define helper_vmfle_vf_d helper_vmfle_vf_d_riscv64
+#define helper_vmfle_vf_h helper_vmfle_vf_h_riscv64
+#define helper_vmfle_vf_w helper_vmfle_vf_w_riscv64
+#define helper_vmfle_vv_d helper_vmfle_vv_d_riscv64
+#define helper_vmfle_vv_h helper_vmfle_vv_h_riscv64
+#define helper_vmfle_vv_w helper_vmfle_vv_w_riscv64
+#define helper_vmflt_vf_d helper_vmflt_vf_d_riscv64
+#define helper_vmflt_vf_h helper_vmflt_vf_h_riscv64
+#define helper_vmflt_vf_w helper_vmflt_vf_w_riscv64
+#define helper_vmflt_vv_d helper_vmflt_vv_d_riscv64
+#define helper_vmflt_vv_h helper_vmflt_vv_h_riscv64
+#define helper_vmflt_vv_w helper_vmflt_vv_w_riscv64
+#define helper_vmfne_vf_d helper_vmfne_vf_d_riscv64
+#define helper_vmfne_vf_h helper_vmfne_vf_h_riscv64
+#define helper_vmfne_vf_w helper_vmfne_vf_w_riscv64
+#define helper_vmfne_vv_d helper_vmfne_vv_d_riscv64
+#define helper_vmfne_vv_h helper_vmfne_vv_h_riscv64
+#define helper_vmfne_vv_w helper_vmfne_vv_w_riscv64
+#define helper_vmford_vf_d helper_vmford_vf_d_riscv64
+#define helper_vmford_vf_h helper_vmford_vf_h_riscv64
+#define helper_vmford_vf_w helper_vmford_vf_w_riscv64
+#define helper_vmford_vv_d helper_vmford_vv_d_riscv64
+#define helper_vmford_vv_h helper_vmford_vv_h_riscv64
+#define helper_vmford_vv_w helper_vmford_vv_w_riscv64
+#define helper_vmin_vv_b helper_vmin_vv_b_riscv64
+#define helper_vmin_vv_d helper_vmin_vv_d_riscv64
+#define helper_vmin_vv_h helper_vmin_vv_h_riscv64
+#define helper_vmin_vv_w helper_vmin_vv_w_riscv64
+#define helper_vmin_vx_b helper_vmin_vx_b_riscv64
+#define helper_vmin_vx_d helper_vmin_vx_d_riscv64
+#define helper_vmin_vx_h helper_vmin_vx_h_riscv64
+#define helper_vmin_vx_w helper_vmin_vx_w_riscv64
+#define helper_vminu_vv_b helper_vminu_vv_b_riscv64
+#define helper_vminu_vv_d helper_vminu_vv_d_riscv64
+#define helper_vminu_vv_h helper_vminu_vv_h_riscv64
+#define helper_vminu_vv_w helper_vminu_vv_w_riscv64
+#define helper_vminu_vx_b helper_vminu_vx_b_riscv64
+#define helper_vminu_vx_d helper_vminu_vx_d_riscv64
+#define helper_vminu_vx_h helper_vminu_vx_h_riscv64
+#define helper_vminu_vx_w helper_vminu_vx_w_riscv64
+#define helper_vmnand_mm helper_vmnand_mm_riscv64
+#define helper_vmnor_mm helper_vmnor_mm_riscv64
+#define helper_vmor_mm helper_vmor_mm_riscv64
+#define helper_vmornot_mm helper_vmornot_mm_riscv64
+#define helper_vmpopc_m helper_vmpopc_m_riscv64
+#define helper_vmsbc_vvm_b helper_vmsbc_vvm_b_riscv64
+#define helper_vmsbc_vvm_d helper_vmsbc_vvm_d_riscv64
+#define helper_vmsbc_vvm_h helper_vmsbc_vvm_h_riscv64
+#define helper_vmsbc_vvm_w helper_vmsbc_vvm_w_riscv64
+#define helper_vmsbc_vxm_b helper_vmsbc_vxm_b_riscv64
+#define helper_vmsbc_vxm_d helper_vmsbc_vxm_d_riscv64
+#define helper_vmsbc_vxm_h helper_vmsbc_vxm_h_riscv64
+#define helper_vmsbc_vxm_w helper_vmsbc_vxm_w_riscv64
+#define helper_vmsbf_m helper_vmsbf_m_riscv64
+#define helper_vmseq_vv_b helper_vmseq_vv_b_riscv64
+#define helper_vmseq_vv_d helper_vmseq_vv_d_riscv64
+#define helper_vmseq_vv_h helper_vmseq_vv_h_riscv64
+#define helper_vmseq_vv_w helper_vmseq_vv_w_riscv64
+#define helper_vmseq_vx_b helper_vmseq_vx_b_riscv64
+#define helper_vmseq_vx_d helper_vmseq_vx_d_riscv64
+#define helper_vmseq_vx_h helper_vmseq_vx_h_riscv64
+#define helper_vmseq_vx_w helper_vmseq_vx_w_riscv64
+#define helper_vmsgt_vx_b helper_vmsgt_vx_b_riscv64
+#define helper_vmsgt_vx_d helper_vmsgt_vx_d_riscv64
+#define helper_vmsgt_vx_h helper_vmsgt_vx_h_riscv64
+#define helper_vmsgt_vx_w helper_vmsgt_vx_w_riscv64
+#define helper_vmsgtu_vx_b helper_vmsgtu_vx_b_riscv64
+#define helper_vmsgtu_vx_d helper_vmsgtu_vx_d_riscv64
+#define helper_vmsgtu_vx_h helper_vmsgtu_vx_h_riscv64
+#define helper_vmsgtu_vx_w helper_vmsgtu_vx_w_riscv64
+#define helper_vmsif_m helper_vmsif_m_riscv64
+#define helper_vmsle_vv_b helper_vmsle_vv_b_riscv64
+#define helper_vmsle_vv_d helper_vmsle_vv_d_riscv64
+#define helper_vmsle_vv_h helper_vmsle_vv_h_riscv64
+#define helper_vmsle_vv_w helper_vmsle_vv_w_riscv64
+#define helper_vmsle_vx_b helper_vmsle_vx_b_riscv64
+#define helper_vmsle_vx_d helper_vmsle_vx_d_riscv64
+#define helper_vmsle_vx_h helper_vmsle_vx_h_riscv64
+#define helper_vmsle_vx_w helper_vmsle_vx_w_riscv64
+#define helper_vmsleu_vv_b helper_vmsleu_vv_b_riscv64
+#define helper_vmsleu_vv_d helper_vmsleu_vv_d_riscv64
+#define helper_vmsleu_vv_h helper_vmsleu_vv_h_riscv64
+#define helper_vmsleu_vv_w helper_vmsleu_vv_w_riscv64
+#define helper_vmsleu_vx_b helper_vmsleu_vx_b_riscv64
+#define helper_vmsleu_vx_d helper_vmsleu_vx_d_riscv64
+#define helper_vmsleu_vx_h helper_vmsleu_vx_h_riscv64
+#define helper_vmsleu_vx_w helper_vmsleu_vx_w_riscv64
+#define helper_vmslt_vv_b helper_vmslt_vv_b_riscv64
+#define helper_vmslt_vv_d helper_vmslt_vv_d_riscv64
+#define helper_vmslt_vv_h helper_vmslt_vv_h_riscv64
+#define helper_vmslt_vv_w helper_vmslt_vv_w_riscv64
+#define helper_vmslt_vx_b helper_vmslt_vx_b_riscv64
+#define helper_vmslt_vx_d helper_vmslt_vx_d_riscv64
+#define helper_vmslt_vx_h helper_vmslt_vx_h_riscv64
+#define helper_vmslt_vx_w helper_vmslt_vx_w_riscv64
+#define helper_vmsltu_vv_b helper_vmsltu_vv_b_riscv64
+#define helper_vmsltu_vv_d helper_vmsltu_vv_d_riscv64
+#define helper_vmsltu_vv_h helper_vmsltu_vv_h_riscv64
+#define helper_vmsltu_vv_w helper_vmsltu_vv_w_riscv64
+#define helper_vmsltu_vx_b helper_vmsltu_vx_b_riscv64
+#define helper_vmsltu_vx_d helper_vmsltu_vx_d_riscv64
+#define helper_vmsltu_vx_h helper_vmsltu_vx_h_riscv64
+#define helper_vmsltu_vx_w helper_vmsltu_vx_w_riscv64
+#define helper_vmsne_vv_b helper_vmsne_vv_b_riscv64
+#define helper_vmsne_vv_d helper_vmsne_vv_d_riscv64
+#define helper_vmsne_vv_h helper_vmsne_vv_h_riscv64
+#define helper_vmsne_vv_w helper_vmsne_vv_w_riscv64
+#define helper_vmsne_vx_b helper_vmsne_vx_b_riscv64
+#define helper_vmsne_vx_d helper_vmsne_vx_d_riscv64
+#define helper_vmsne_vx_h helper_vmsne_vx_h_riscv64
+#define helper_vmsne_vx_w helper_vmsne_vx_w_riscv64
+#define helper_vmsof_m helper_vmsof_m_riscv64
+#define helper_vmul_vv_b helper_vmul_vv_b_riscv64
+#define helper_vmul_vv_d helper_vmul_vv_d_riscv64
+#define helper_vmul_vv_h helper_vmul_vv_h_riscv64
+#define helper_vmul_vv_w helper_vmul_vv_w_riscv64
+#define helper_vmul_vx_b helper_vmul_vx_b_riscv64
+#define helper_vmul_vx_d helper_vmul_vx_d_riscv64
+#define helper_vmul_vx_h helper_vmul_vx_h_riscv64
+#define helper_vmul_vx_w helper_vmul_vx_w_riscv64
+#define helper_vmulh_vv_b helper_vmulh_vv_b_riscv64
+#define helper_vmulh_vv_d helper_vmulh_vv_d_riscv64
+#define helper_vmulh_vv_h helper_vmulh_vv_h_riscv64
+#define helper_vmulh_vv_w helper_vmulh_vv_w_riscv64
+#define helper_vmulh_vx_b helper_vmulh_vx_b_riscv64
+#define helper_vmulh_vx_d helper_vmulh_vx_d_riscv64
+#define helper_vmulh_vx_h helper_vmulh_vx_h_riscv64
+#define helper_vmulh_vx_w helper_vmulh_vx_w_riscv64
+#define helper_vmulhsu_vv_b helper_vmulhsu_vv_b_riscv64
+#define helper_vmulhsu_vv_d helper_vmulhsu_vv_d_riscv64
+#define helper_vmulhsu_vv_h helper_vmulhsu_vv_h_riscv64
+#define helper_vmulhsu_vv_w helper_vmulhsu_vv_w_riscv64
+#define helper_vmulhsu_vx_b helper_vmulhsu_vx_b_riscv64
+#define helper_vmulhsu_vx_d helper_vmulhsu_vx_d_riscv64
+#define helper_vmulhsu_vx_h helper_vmulhsu_vx_h_riscv64
+#define helper_vmulhsu_vx_w helper_vmulhsu_vx_w_riscv64
+#define helper_vmulhu_vv_b helper_vmulhu_vv_b_riscv64
+#define helper_vmulhu_vv_d helper_vmulhu_vv_d_riscv64
+#define helper_vmulhu_vv_h helper_vmulhu_vv_h_riscv64
+#define helper_vmulhu_vv_w helper_vmulhu_vv_w_riscv64
+#define helper_vmulhu_vx_b helper_vmulhu_vx_b_riscv64
+#define helper_vmulhu_vx_d helper_vmulhu_vx_d_riscv64
+#define helper_vmulhu_vx_h helper_vmulhu_vx_h_riscv64
+#define helper_vmulhu_vx_w helper_vmulhu_vx_w_riscv64
+#define helper_vmv_v_v_b helper_vmv_v_v_b_riscv64
+#define helper_vmv_v_v_d helper_vmv_v_v_d_riscv64
+#define helper_vmv_v_v_h helper_vmv_v_v_h_riscv64
+#define helper_vmv_v_v_w helper_vmv_v_v_w_riscv64
+#define helper_vmv_v_x_b helper_vmv_v_x_b_riscv64
+#define helper_vmv_v_x_d helper_vmv_v_x_d_riscv64
+#define helper_vmv_v_x_h helper_vmv_v_x_h_riscv64
+#define helper_vmv_v_x_w helper_vmv_v_x_w_riscv64
+#define helper_vmxnor_mm helper_vmxnor_mm_riscv64
+#define helper_vmxor_mm helper_vmxor_mm_riscv64
+#define helper_vnclip_vv_b helper_vnclip_vv_b_riscv64
+#define helper_vnclip_vv_h helper_vnclip_vv_h_riscv64
+#define helper_vnclip_vv_w helper_vnclip_vv_w_riscv64
+#define helper_vnclip_vx_b helper_vnclip_vx_b_riscv64
+#define helper_vnclip_vx_h helper_vnclip_vx_h_riscv64
+#define helper_vnclip_vx_w helper_vnclip_vx_w_riscv64
+#define helper_vnclipu_vv_b helper_vnclipu_vv_b_riscv64
+#define helper_vnclipu_vv_h helper_vnclipu_vv_h_riscv64
+#define helper_vnclipu_vv_w helper_vnclipu_vv_w_riscv64
+#define helper_vnclipu_vx_b helper_vnclipu_vx_b_riscv64
+#define helper_vnclipu_vx_h helper_vnclipu_vx_h_riscv64
+#define helper_vnclipu_vx_w helper_vnclipu_vx_w_riscv64
+#define helper_vnmsac_vv_b helper_vnmsac_vv_b_riscv64
+#define helper_vnmsac_vv_d helper_vnmsac_vv_d_riscv64
+#define helper_vnmsac_vv_h helper_vnmsac_vv_h_riscv64
+#define helper_vnmsac_vv_w helper_vnmsac_vv_w_riscv64
+#define helper_vnmsac_vx_b helper_vnmsac_vx_b_riscv64
+#define helper_vnmsac_vx_d helper_vnmsac_vx_d_riscv64
+#define helper_vnmsac_vx_h helper_vnmsac_vx_h_riscv64
+#define helper_vnmsac_vx_w helper_vnmsac_vx_w_riscv64
+#define helper_vnmsub_vv_b helper_vnmsub_vv_b_riscv64
+#define helper_vnmsub_vv_d helper_vnmsub_vv_d_riscv64
+#define helper_vnmsub_vv_h helper_vnmsub_vv_h_riscv64
+#define helper_vnmsub_vv_w helper_vnmsub_vv_w_riscv64
+#define helper_vnmsub_vx_b helper_vnmsub_vx_b_riscv64
+#define helper_vnmsub_vx_d helper_vnmsub_vx_d_riscv64
+#define helper_vnmsub_vx_h helper_vnmsub_vx_h_riscv64
+#define helper_vnmsub_vx_w helper_vnmsub_vx_w_riscv64
+#define helper_vnsra_vv_b helper_vnsra_vv_b_riscv64
+#define helper_vnsra_vv_h helper_vnsra_vv_h_riscv64
+#define helper_vnsra_vv_w helper_vnsra_vv_w_riscv64
+#define helper_vnsra_vx_b helper_vnsra_vx_b_riscv64
+#define helper_vnsra_vx_h helper_vnsra_vx_h_riscv64
+#define helper_vnsra_vx_w helper_vnsra_vx_w_riscv64
+#define helper_vnsrl_vv_b helper_vnsrl_vv_b_riscv64
+#define helper_vnsrl_vv_h helper_vnsrl_vv_h_riscv64
+#define helper_vnsrl_vv_w helper_vnsrl_vv_w_riscv64
+#define helper_vnsrl_vx_b helper_vnsrl_vx_b_riscv64
+#define helper_vnsrl_vx_h helper_vnsrl_vx_h_riscv64
+#define helper_vnsrl_vx_w helper_vnsrl_vx_w_riscv64
+#define helper_vor_vv_b helper_vor_vv_b_riscv64
+#define helper_vor_vv_d helper_vor_vv_d_riscv64
+#define helper_vor_vv_h helper_vor_vv_h_riscv64
+#define helper_vor_vv_w helper_vor_vv_w_riscv64
+#define helper_vor_vx_b helper_vor_vx_b_riscv64
+#define helper_vor_vx_d helper_vor_vx_d_riscv64
+#define helper_vor_vx_h helper_vor_vx_h_riscv64
+#define helper_vor_vx_w helper_vor_vx_w_riscv64
+#define helper_vredand_vs_b helper_vredand_vs_b_riscv64
+#define helper_vredand_vs_d helper_vredand_vs_d_riscv64
+#define helper_vredand_vs_h helper_vredand_vs_h_riscv64
+#define helper_vredand_vs_w helper_vredand_vs_w_riscv64
+#define helper_vredmax_vs_b helper_vredmax_vs_b_riscv64
+#define helper_vredmax_vs_d helper_vredmax_vs_d_riscv64
+#define helper_vredmax_vs_h helper_vredmax_vs_h_riscv64
+#define helper_vredmax_vs_w helper_vredmax_vs_w_riscv64
+#define helper_vredmaxu_vs_b helper_vredmaxu_vs_b_riscv64
+#define helper_vredmaxu_vs_d helper_vredmaxu_vs_d_riscv64
+#define helper_vredmaxu_vs_h helper_vredmaxu_vs_h_riscv64
+#define helper_vredmaxu_vs_w helper_vredmaxu_vs_w_riscv64
+#define helper_vredmin_vs_b helper_vredmin_vs_b_riscv64
+#define helper_vredmin_vs_d helper_vredmin_vs_d_riscv64
+#define helper_vredmin_vs_h helper_vredmin_vs_h_riscv64
+#define helper_vredmin_vs_w helper_vredmin_vs_w_riscv64
+#define helper_vredminu_vs_b helper_vredminu_vs_b_riscv64
+#define helper_vredminu_vs_d helper_vredminu_vs_d_riscv64
+#define helper_vredminu_vs_h helper_vredminu_vs_h_riscv64
+#define helper_vredminu_vs_w helper_vredminu_vs_w_riscv64
+#define helper_vredor_vs_b helper_vredor_vs_b_riscv64
+#define helper_vredor_vs_d helper_vredor_vs_d_riscv64
+#define helper_vredor_vs_h helper_vredor_vs_h_riscv64
+#define helper_vredor_vs_w helper_vredor_vs_w_riscv64
+#define helper_vredsum_vs_b helper_vredsum_vs_b_riscv64
+#define helper_vredsum_vs_d helper_vredsum_vs_d_riscv64
+#define helper_vredsum_vs_h helper_vredsum_vs_h_riscv64
+#define helper_vredsum_vs_w helper_vredsum_vs_w_riscv64
+#define helper_vredxor_vs_b helper_vredxor_vs_b_riscv64
+#define helper_vredxor_vs_d helper_vredxor_vs_d_riscv64
+#define helper_vredxor_vs_h helper_vredxor_vs_h_riscv64
+#define helper_vredxor_vs_w helper_vredxor_vs_w_riscv64
+#define helper_vrem_vv_b helper_vrem_vv_b_riscv64
+#define helper_vrem_vv_d helper_vrem_vv_d_riscv64
+#define helper_vrem_vv_h helper_vrem_vv_h_riscv64
+#define helper_vrem_vv_w helper_vrem_vv_w_riscv64
+#define helper_vrem_vx_b helper_vrem_vx_b_riscv64
+#define helper_vrem_vx_d helper_vrem_vx_d_riscv64
+#define helper_vrem_vx_h helper_vrem_vx_h_riscv64
+#define helper_vrem_vx_w helper_vrem_vx_w_riscv64
+#define helper_vremu_vv_b helper_vremu_vv_b_riscv64
+#define helper_vremu_vv_d helper_vremu_vv_d_riscv64
+#define helper_vremu_vv_h helper_vremu_vv_h_riscv64
+#define helper_vremu_vv_w helper_vremu_vv_w_riscv64
+#define helper_vremu_vx_b helper_vremu_vx_b_riscv64
+#define helper_vremu_vx_d helper_vremu_vx_d_riscv64
+#define helper_vremu_vx_h helper_vremu_vx_h_riscv64
+#define helper_vremu_vx_w helper_vremu_vx_w_riscv64
+#define helper_vrgather_vv_b helper_vrgather_vv_b_riscv64
+#define helper_vrgather_vv_d helper_vrgather_vv_d_riscv64
+#define helper_vrgather_vv_h helper_vrgather_vv_h_riscv64
+#define helper_vrgather_vv_w helper_vrgather_vv_w_riscv64
+#define helper_vrgather_vx_b helper_vrgather_vx_b_riscv64
+#define helper_vrgather_vx_d helper_vrgather_vx_d_riscv64
+#define helper_vrgather_vx_h helper_vrgather_vx_h_riscv64
+#define helper_vrgather_vx_w helper_vrgather_vx_w_riscv64
+#define helper_vrsub_vx_b helper_vrsub_vx_b_riscv64
+#define helper_vrsub_vx_d helper_vrsub_vx_d_riscv64
+#define helper_vrsub_vx_h helper_vrsub_vx_h_riscv64
+#define helper_vrsub_vx_w helper_vrsub_vx_w_riscv64
+#define helper_vsadd_vv_b helper_vsadd_vv_b_riscv64
+#define helper_vsadd_vv_d helper_vsadd_vv_d_riscv64
+#define helper_vsadd_vv_h helper_vsadd_vv_h_riscv64
+#define helper_vsadd_vv_w helper_vsadd_vv_w_riscv64
+#define helper_vsadd_vx_b helper_vsadd_vx_b_riscv64
+#define helper_vsadd_vx_d helper_vsadd_vx_d_riscv64
+#define helper_vsadd_vx_h helper_vsadd_vx_h_riscv64
+#define helper_vsadd_vx_w helper_vsadd_vx_w_riscv64
+#define helper_vsaddu_vv_b helper_vsaddu_vv_b_riscv64
+#define helper_vsaddu_vv_d helper_vsaddu_vv_d_riscv64
+#define helper_vsaddu_vv_h helper_vsaddu_vv_h_riscv64
+#define helper_vsaddu_vv_w helper_vsaddu_vv_w_riscv64
+#define helper_vsaddu_vx_b helper_vsaddu_vx_b_riscv64
+#define helper_vsaddu_vx_d helper_vsaddu_vx_d_riscv64
+#define helper_vsaddu_vx_h helper_vsaddu_vx_h_riscv64
+#define helper_vsaddu_vx_w helper_vsaddu_vx_w_riscv64
+#define helper_vsb_v_b helper_vsb_v_b_riscv64
+#define helper_vsb_v_b_mask helper_vsb_v_b_mask_riscv64
+#define helper_vsb_v_d helper_vsb_v_d_riscv64
+#define helper_vsb_v_d_mask helper_vsb_v_d_mask_riscv64
+#define helper_vsb_v_h helper_vsb_v_h_riscv64
+#define helper_vsb_v_h_mask helper_vsb_v_h_mask_riscv64
+#define helper_vsb_v_w helper_vsb_v_w_riscv64
+#define helper_vsb_v_w_mask helper_vsb_v_w_mask_riscv64
+#define helper_vsbc_vvm_b helper_vsbc_vvm_b_riscv64
+#define helper_vsbc_vvm_d helper_vsbc_vvm_d_riscv64
+#define helper_vsbc_vvm_h helper_vsbc_vvm_h_riscv64
+#define helper_vsbc_vvm_w helper_vsbc_vvm_w_riscv64
+#define helper_vsbc_vxm_b helper_vsbc_vxm_b_riscv64
+#define helper_vsbc_vxm_d helper_vsbc_vxm_d_riscv64
+#define helper_vsbc_vxm_h helper_vsbc_vxm_h_riscv64
+#define helper_vsbc_vxm_w helper_vsbc_vxm_w_riscv64
+#define helper_vse_v_b helper_vse_v_b_riscv64
+#define helper_vse_v_b_mask helper_vse_v_b_mask_riscv64
+#define helper_vse_v_d helper_vse_v_d_riscv64
+#define helper_vse_v_d_mask helper_vse_v_d_mask_riscv64
+#define helper_vse_v_h helper_vse_v_h_riscv64
+#define helper_vse_v_h_mask helper_vse_v_h_mask_riscv64
+#define helper_vse_v_w helper_vse_v_w_riscv64
+#define helper_vse_v_w_mask helper_vse_v_w_mask_riscv64
+#define helper_vsetvl helper_vsetvl_riscv64
+#define helper_vsh_v_d helper_vsh_v_d_riscv64
+#define helper_vsh_v_d_mask helper_vsh_v_d_mask_riscv64
+#define helper_vsh_v_h helper_vsh_v_h_riscv64
+#define helper_vsh_v_h_mask helper_vsh_v_h_mask_riscv64
+#define helper_vsh_v_w helper_vsh_v_w_riscv64
+#define helper_vsh_v_w_mask helper_vsh_v_w_mask_riscv64
+#define helper_vslide1down_vx_b helper_vslide1down_vx_b_riscv64
+#define helper_vslide1down_vx_d helper_vslide1down_vx_d_riscv64
+#define helper_vslide1down_vx_h helper_vslide1down_vx_h_riscv64
+#define helper_vslide1down_vx_w helper_vslide1down_vx_w_riscv64
+#define helper_vslide1up_vx_b helper_vslide1up_vx_b_riscv64
+#define helper_vslide1up_vx_d helper_vslide1up_vx_d_riscv64
+#define helper_vslide1up_vx_h helper_vslide1up_vx_h_riscv64
+#define helper_vslide1up_vx_w helper_vslide1up_vx_w_riscv64
+#define helper_vslidedown_vx_b helper_vslidedown_vx_b_riscv64
+#define helper_vslidedown_vx_d helper_vslidedown_vx_d_riscv64
+#define helper_vslidedown_vx_h helper_vslidedown_vx_h_riscv64
+#define helper_vslidedown_vx_w helper_vslidedown_vx_w_riscv64
+#define helper_vslideup_vx_b helper_vslideup_vx_b_riscv64
+#define helper_vslideup_vx_d helper_vslideup_vx_d_riscv64
+#define helper_vslideup_vx_h helper_vslideup_vx_h_riscv64
+#define helper_vslideup_vx_w helper_vslideup_vx_w_riscv64
+#define helper_vsll_vv_b helper_vsll_vv_b_riscv64
+#define helper_vsll_vv_d helper_vsll_vv_d_riscv64
+#define helper_vsll_vv_h helper_vsll_vv_h_riscv64
+#define helper_vsll_vv_w helper_vsll_vv_w_riscv64
+#define helper_vsll_vx_b helper_vsll_vx_b_riscv64
+#define helper_vsll_vx_d helper_vsll_vx_d_riscv64
+#define helper_vsll_vx_h helper_vsll_vx_h_riscv64
+#define helper_vsll_vx_w helper_vsll_vx_w_riscv64
+#define helper_vsmul_vv_b helper_vsmul_vv_b_riscv64
+#define helper_vsmul_vv_d helper_vsmul_vv_d_riscv64
+#define helper_vsmul_vv_h helper_vsmul_vv_h_riscv64
+#define helper_vsmul_vv_w helper_vsmul_vv_w_riscv64
+#define helper_vsmul_vx_b helper_vsmul_vx_b_riscv64
+#define helper_vsmul_vx_d helper_vsmul_vx_d_riscv64
+#define helper_vsmul_vx_h helper_vsmul_vx_h_riscv64
+#define helper_vsmul_vx_w helper_vsmul_vx_w_riscv64
+#define helper_vsra_vv_b helper_vsra_vv_b_riscv64
+#define helper_vsra_vv_d helper_vsra_vv_d_riscv64
+#define helper_vsra_vv_h helper_vsra_vv_h_riscv64
+#define helper_vsra_vv_w helper_vsra_vv_w_riscv64
+#define helper_vsra_vx_b helper_vsra_vx_b_riscv64
+#define helper_vsra_vx_d helper_vsra_vx_d_riscv64
+#define helper_vsra_vx_h helper_vsra_vx_h_riscv64
+#define helper_vsra_vx_w helper_vsra_vx_w_riscv64
+#define helper_vsrl_vv_b helper_vsrl_vv_b_riscv64
+#define helper_vsrl_vv_d helper_vsrl_vv_d_riscv64
+#define helper_vsrl_vv_h helper_vsrl_vv_h_riscv64
+#define helper_vsrl_vv_w helper_vsrl_vv_w_riscv64
+#define helper_vsrl_vx_b helper_vsrl_vx_b_riscv64
+#define helper_vsrl_vx_d helper_vsrl_vx_d_riscv64
+#define helper_vsrl_vx_h helper_vsrl_vx_h_riscv64
+#define helper_vsrl_vx_w helper_vsrl_vx_w_riscv64
+#define helper_vssb_v_b helper_vssb_v_b_riscv64
+#define helper_vssb_v_d helper_vssb_v_d_riscv64
+#define helper_vssb_v_h helper_vssb_v_h_riscv64
+#define helper_vssb_v_w helper_vssb_v_w_riscv64
+#define helper_vsse_v_b helper_vsse_v_b_riscv64
+#define helper_vsse_v_d helper_vsse_v_d_riscv64
+#define helper_vsse_v_h helper_vsse_v_h_riscv64
+#define helper_vsse_v_w helper_vsse_v_w_riscv64
+#define helper_vssh_v_d helper_vssh_v_d_riscv64
+#define helper_vssh_v_h helper_vssh_v_h_riscv64
+#define helper_vssh_v_w helper_vssh_v_w_riscv64
+#define helper_vssra_vv_b helper_vssra_vv_b_riscv64
+#define helper_vssra_vv_d helper_vssra_vv_d_riscv64
+#define helper_vssra_vv_h helper_vssra_vv_h_riscv64
+#define helper_vssra_vv_w helper_vssra_vv_w_riscv64
+#define helper_vssra_vx_b helper_vssra_vx_b_riscv64
+#define helper_vssra_vx_d helper_vssra_vx_d_riscv64
+#define helper_vssra_vx_h helper_vssra_vx_h_riscv64
+#define helper_vssra_vx_w helper_vssra_vx_w_riscv64
+#define helper_vssrl_vv_b helper_vssrl_vv_b_riscv64
+#define helper_vssrl_vv_d helper_vssrl_vv_d_riscv64
+#define helper_vssrl_vv_h helper_vssrl_vv_h_riscv64
+#define helper_vssrl_vv_w helper_vssrl_vv_w_riscv64
+#define helper_vssrl_vx_b helper_vssrl_vx_b_riscv64
+#define helper_vssrl_vx_d helper_vssrl_vx_d_riscv64
+#define helper_vssrl_vx_h helper_vssrl_vx_h_riscv64
+#define helper_vssrl_vx_w helper_vssrl_vx_w_riscv64
+#define helper_vssub_vv_b helper_vssub_vv_b_riscv64
+#define helper_vssub_vv_d helper_vssub_vv_d_riscv64
+#define helper_vssub_vv_h helper_vssub_vv_h_riscv64
+#define helper_vssub_vv_w helper_vssub_vv_w_riscv64
+#define helper_vssub_vx_b helper_vssub_vx_b_riscv64
+#define helper_vssub_vx_d helper_vssub_vx_d_riscv64
+#define helper_vssub_vx_h helper_vssub_vx_h_riscv64
+#define helper_vssub_vx_w helper_vssub_vx_w_riscv64
+#define helper_vssubu_vv_b helper_vssubu_vv_b_riscv64
+#define helper_vssubu_vv_d helper_vssubu_vv_d_riscv64
+#define helper_vssubu_vv_h helper_vssubu_vv_h_riscv64
+#define helper_vssubu_vv_w helper_vssubu_vv_w_riscv64
+#define helper_vssubu_vx_b helper_vssubu_vx_b_riscv64
+#define helper_vssubu_vx_d helper_vssubu_vx_d_riscv64
+#define helper_vssubu_vx_h helper_vssubu_vx_h_riscv64
+#define helper_vssubu_vx_w helper_vssubu_vx_w_riscv64
+#define helper_vssw_v_d helper_vssw_v_d_riscv64
+#define helper_vssw_v_w helper_vssw_v_w_riscv64
+#define helper_vsub_vv_b helper_vsub_vv_b_riscv64
+#define helper_vsub_vv_d helper_vsub_vv_d_riscv64
+#define helper_vsub_vv_h helper_vsub_vv_h_riscv64
+#define helper_vsub_vv_w helper_vsub_vv_w_riscv64
+#define helper_vsub_vx_b helper_vsub_vx_b_riscv64
+#define helper_vsub_vx_d helper_vsub_vx_d_riscv64
+#define helper_vsub_vx_h helper_vsub_vx_h_riscv64
+#define helper_vsub_vx_w helper_vsub_vx_w_riscv64
+#define helper_vsw_v_d helper_vsw_v_d_riscv64
+#define helper_vsw_v_d_mask helper_vsw_v_d_mask_riscv64
+#define helper_vsw_v_w helper_vsw_v_w_riscv64
+#define helper_vsw_v_w_mask helper_vsw_v_w_mask_riscv64
+#define helper_vsxb_v_b helper_vsxb_v_b_riscv64
+#define helper_vsxb_v_d helper_vsxb_v_d_riscv64
+#define helper_vsxb_v_h helper_vsxb_v_h_riscv64
+#define helper_vsxb_v_w helper_vsxb_v_w_riscv64
+#define helper_vsxe_v_b helper_vsxe_v_b_riscv64
+#define helper_vsxe_v_d helper_vsxe_v_d_riscv64
+#define helper_vsxe_v_h helper_vsxe_v_h_riscv64
+#define helper_vsxe_v_w helper_vsxe_v_w_riscv64
+#define helper_vsxh_v_d helper_vsxh_v_d_riscv64
+#define helper_vsxh_v_h helper_vsxh_v_h_riscv64
+#define helper_vsxh_v_w helper_vsxh_v_w_riscv64
+#define helper_vsxw_v_d helper_vsxw_v_d_riscv64
+#define helper_vsxw_v_w helper_vsxw_v_w_riscv64
+#define helper_vwadd_vv_b helper_vwadd_vv_b_riscv64
+#define helper_vwadd_vv_h helper_vwadd_vv_h_riscv64
+#define helper_vwadd_vv_w helper_vwadd_vv_w_riscv64
+#define helper_vwadd_vx_b helper_vwadd_vx_b_riscv64
+#define helper_vwadd_vx_h helper_vwadd_vx_h_riscv64
+#define helper_vwadd_vx_w helper_vwadd_vx_w_riscv64
+#define helper_vwadd_wv_b helper_vwadd_wv_b_riscv64
+#define helper_vwadd_wv_h helper_vwadd_wv_h_riscv64
+#define helper_vwadd_wv_w helper_vwadd_wv_w_riscv64
+#define helper_vwadd_wx_b helper_vwadd_wx_b_riscv64
+#define helper_vwadd_wx_h helper_vwadd_wx_h_riscv64
+#define helper_vwadd_wx_w helper_vwadd_wx_w_riscv64
+#define helper_vwaddu_vv_b helper_vwaddu_vv_b_riscv64
+#define helper_vwaddu_vv_h helper_vwaddu_vv_h_riscv64
+#define helper_vwaddu_vv_w helper_vwaddu_vv_w_riscv64
+#define helper_vwaddu_vx_b helper_vwaddu_vx_b_riscv64
+#define helper_vwaddu_vx_h helper_vwaddu_vx_h_riscv64
+#define helper_vwaddu_vx_w helper_vwaddu_vx_w_riscv64
+#define helper_vwaddu_wv_b helper_vwaddu_wv_b_riscv64
+#define helper_vwaddu_wv_h helper_vwaddu_wv_h_riscv64
+#define helper_vwaddu_wv_w helper_vwaddu_wv_w_riscv64
+#define helper_vwaddu_wx_b helper_vwaddu_wx_b_riscv64
+#define helper_vwaddu_wx_h helper_vwaddu_wx_h_riscv64
+#define helper_vwaddu_wx_w helper_vwaddu_wx_w_riscv64
+#define helper_vwmacc_vv_b helper_vwmacc_vv_b_riscv64
+#define helper_vwmacc_vv_h helper_vwmacc_vv_h_riscv64
+#define helper_vwmacc_vv_w helper_vwmacc_vv_w_riscv64
+#define helper_vwmacc_vx_b helper_vwmacc_vx_b_riscv64
+#define helper_vwmacc_vx_h helper_vwmacc_vx_h_riscv64
+#define helper_vwmacc_vx_w helper_vwmacc_vx_w_riscv64
+#define helper_vwmaccsu_vv_b helper_vwmaccsu_vv_b_riscv64
+#define helper_vwmaccsu_vv_h helper_vwmaccsu_vv_h_riscv64
+#define helper_vwmaccsu_vv_w helper_vwmaccsu_vv_w_riscv64
+#define helper_vwmaccsu_vx_b helper_vwmaccsu_vx_b_riscv64
+#define helper_vwmaccsu_vx_h helper_vwmaccsu_vx_h_riscv64
+#define helper_vwmaccsu_vx_w helper_vwmaccsu_vx_w_riscv64
+#define helper_vwmaccu_vv_b helper_vwmaccu_vv_b_riscv64
+#define helper_vwmaccu_vv_h helper_vwmaccu_vv_h_riscv64
+#define helper_vwmaccu_vv_w helper_vwmaccu_vv_w_riscv64
+#define helper_vwmaccu_vx_b helper_vwmaccu_vx_b_riscv64
+#define helper_vwmaccu_vx_h helper_vwmaccu_vx_h_riscv64
+#define helper_vwmaccu_vx_w helper_vwmaccu_vx_w_riscv64
+#define helper_vwmaccus_vx_b helper_vwmaccus_vx_b_riscv64
+#define helper_vwmaccus_vx_h helper_vwmaccus_vx_h_riscv64
+#define helper_vwmaccus_vx_w helper_vwmaccus_vx_w_riscv64
+#define helper_vwmul_vv_b helper_vwmul_vv_b_riscv64
+#define helper_vwmul_vv_h helper_vwmul_vv_h_riscv64
+#define helper_vwmul_vv_w helper_vwmul_vv_w_riscv64
+#define helper_vwmul_vx_b helper_vwmul_vx_b_riscv64
+#define helper_vwmul_vx_h helper_vwmul_vx_h_riscv64
+#define helper_vwmul_vx_w helper_vwmul_vx_w_riscv64
+#define helper_vwmulsu_vv_b helper_vwmulsu_vv_b_riscv64
+#define helper_vwmulsu_vv_h helper_vwmulsu_vv_h_riscv64
+#define helper_vwmulsu_vv_w helper_vwmulsu_vv_w_riscv64
+#define helper_vwmulsu_vx_b helper_vwmulsu_vx_b_riscv64
+#define helper_vwmulsu_vx_h helper_vwmulsu_vx_h_riscv64
+#define helper_vwmulsu_vx_w helper_vwmulsu_vx_w_riscv64
+#define helper_vwmulu_vv_b helper_vwmulu_vv_b_riscv64
+#define helper_vwmulu_vv_h helper_vwmulu_vv_h_riscv64
+#define helper_vwmulu_vv_w helper_vwmulu_vv_w_riscv64
+#define helper_vwmulu_vx_b helper_vwmulu_vx_b_riscv64
+#define helper_vwmulu_vx_h helper_vwmulu_vx_h_riscv64
+#define helper_vwmulu_vx_w helper_vwmulu_vx_w_riscv64
+#define helper_vwredsum_vs_b helper_vwredsum_vs_b_riscv64
+#define helper_vwredsum_vs_h helper_vwredsum_vs_h_riscv64
+#define helper_vwredsum_vs_w helper_vwredsum_vs_w_riscv64
+#define helper_vwredsumu_vs_b helper_vwredsumu_vs_b_riscv64
+#define helper_vwredsumu_vs_h helper_vwredsumu_vs_h_riscv64
+#define helper_vwredsumu_vs_w helper_vwredsumu_vs_w_riscv64
+#define helper_vwsmacc_vv_b helper_vwsmacc_vv_b_riscv64
+#define helper_vwsmacc_vv_h helper_vwsmacc_vv_h_riscv64
+#define helper_vwsmacc_vv_w helper_vwsmacc_vv_w_riscv64
+#define helper_vwsmacc_vx_b helper_vwsmacc_vx_b_riscv64
+#define helper_vwsmacc_vx_h helper_vwsmacc_vx_h_riscv64
+#define helper_vwsmacc_vx_w helper_vwsmacc_vx_w_riscv64
+#define helper_vwsmaccsu_vv_b helper_vwsmaccsu_vv_b_riscv64
+#define helper_vwsmaccsu_vv_h helper_vwsmaccsu_vv_h_riscv64
+#define helper_vwsmaccsu_vv_w helper_vwsmaccsu_vv_w_riscv64
+#define helper_vwsmaccsu_vx_b helper_vwsmaccsu_vx_b_riscv64
+#define helper_vwsmaccsu_vx_h helper_vwsmaccsu_vx_h_riscv64
+#define helper_vwsmaccsu_vx_w helper_vwsmaccsu_vx_w_riscv64
+#define helper_vwsmaccu_vv_b helper_vwsmaccu_vv_b_riscv64
+#define helper_vwsmaccu_vv_h helper_vwsmaccu_vv_h_riscv64
+#define helper_vwsmaccu_vv_w helper_vwsmaccu_vv_w_riscv64
+#define helper_vwsmaccu_vx_b helper_vwsmaccu_vx_b_riscv64
+#define helper_vwsmaccu_vx_h helper_vwsmaccu_vx_h_riscv64
+#define helper_vwsmaccu_vx_w helper_vwsmaccu_vx_w_riscv64
+#define helper_vwsmaccus_vx_b helper_vwsmaccus_vx_b_riscv64
+#define helper_vwsmaccus_vx_h helper_vwsmaccus_vx_h_riscv64
+#define helper_vwsmaccus_vx_w helper_vwsmaccus_vx_w_riscv64
+#define helper_vwsub_vv_b helper_vwsub_vv_b_riscv64
+#define helper_vwsub_vv_h helper_vwsub_vv_h_riscv64
+#define helper_vwsub_vv_w helper_vwsub_vv_w_riscv64
+#define helper_vwsub_vx_b helper_vwsub_vx_b_riscv64
+#define helper_vwsub_vx_h helper_vwsub_vx_h_riscv64
+#define helper_vwsub_vx_w helper_vwsub_vx_w_riscv64
+#define helper_vwsub_wv_b helper_vwsub_wv_b_riscv64
+#define helper_vwsub_wv_h helper_vwsub_wv_h_riscv64
+#define helper_vwsub_wv_w helper_vwsub_wv_w_riscv64
+#define helper_vwsub_wx_b helper_vwsub_wx_b_riscv64
+#define helper_vwsub_wx_h helper_vwsub_wx_h_riscv64
+#define helper_vwsub_wx_w helper_vwsub_wx_w_riscv64
+#define helper_vwsubu_vv_b helper_vwsubu_vv_b_riscv64
+#define helper_vwsubu_vv_h helper_vwsubu_vv_h_riscv64
+#define helper_vwsubu_vv_w helper_vwsubu_vv_w_riscv64
+#define helper_vwsubu_vx_b helper_vwsubu_vx_b_riscv64
+#define helper_vwsubu_vx_h helper_vwsubu_vx_h_riscv64
+#define helper_vwsubu_vx_w helper_vwsubu_vx_w_riscv64
+#define helper_vwsubu_wv_b helper_vwsubu_wv_b_riscv64
+#define helper_vwsubu_wv_h helper_vwsubu_wv_h_riscv64
+#define helper_vwsubu_wv_w helper_vwsubu_wv_w_riscv64
+#define helper_vwsubu_wx_b helper_vwsubu_wx_b_riscv64
+#define helper_vwsubu_wx_h helper_vwsubu_wx_h_riscv64
+#define helper_vwsubu_wx_w helper_vwsubu_wx_w_riscv64
+#define helper_vxor_vv_b helper_vxor_vv_b_riscv64
+#define helper_vxor_vv_d helper_vxor_vv_d_riscv64
+#define helper_vxor_vv_h helper_vxor_vv_h_riscv64
+#define helper_vxor_vv_w helper_vxor_vv_w_riscv64
+#define helper_vxor_vx_b helper_vxor_vx_b_riscv64
+#define helper_vxor_vx_d helper_vxor_vx_d_riscv64
+#define helper_vxor_vx_h helper_vxor_vx_h_riscv64
+#define helper_vxor_vx_w helper_vxor_vx_w_riscv64
 #endif
diff --git a/qemu/s390x.h b/qemu/s390x.h
index 1906872bc6..d777300ed6 100644
--- a/qemu/s390x.h
+++ b/qemu/s390x.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_s390x
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_s390x
 #define tcg_gen_st_i64 tcg_gen_st_i64_s390x
+#define tcg_gen_add_i64 tcg_gen_add_i64_s390x
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_s390x
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_s390x
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_s390x
 #define cpu_icount_to_ns cpu_icount_to_ns_s390x
 #define cpu_is_stopped cpu_is_stopped_s390x
 #define cpu_get_ticks cpu_get_ticks_s390x
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_s390x
 #define floatx80_mul floatx80_mul_s390x
 #define floatx80_div floatx80_div_s390x
+#define floatx80_modrem floatx80_modrem_s390x
+#define floatx80_mod floatx80_mod_s390x
 #define floatx80_rem floatx80_rem_s390x
 #define floatx80_sqrt floatx80_sqrt_s390x
 #define floatx80_eq floatx80_eq_s390x
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_s390x
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_s390x
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_s390x
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_s390x
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_s390x
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_s390x
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_s390x
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_s390x
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_s390x
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_s390x
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_s390x
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_s390x
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_s390x
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_s390x
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_s390x
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_s390x
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_s390x
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_s390x
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_s390x
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_s390x
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_s390x
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_s390x
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_s390x
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_s390x
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_s390x
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_s390x
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_s390x
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_s390x
 #define tcg_gen_shri_vec tcg_gen_shri_vec_s390x
 #define tcg_gen_sari_vec tcg_gen_sari_vec_s390x
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_s390x
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_s390x
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_s390x
 #define tcg_gen_add_vec tcg_gen_add_vec_s390x
 #define tcg_gen_sub_vec tcg_gen_sub_vec_s390x
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_s390x
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_s390x
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_s390x
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_s390x
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_s390x
 #define tcg_gen_shls_vec tcg_gen_shls_vec_s390x
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_s390x
 #define tcg_gen_sars_vec tcg_gen_sars_vec_s390x
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_s390x
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_s390x
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_s390x
 #define tb_htable_lookup tb_htable_lookup_s390x
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_s390x
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_s390x
 #define tlb_init tlb_init_s390x
+#define tlb_destroy tlb_destroy_s390x
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_s390x
 #define tlb_flush tlb_flush_s390x
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_s390x
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_s390x
 #define get_page_addr_code_hostp get_page_addr_code_hostp_s390x
 #define get_page_addr_code get_page_addr_code_s390x
+#define probe_access_flags probe_access_flags_s390x
 #define probe_access probe_access_s390x
 #define tlb_vaddr_to_host tlb_vaddr_to_host_s390x
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_s390x
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_s390x
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_s390x
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_s390x
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_s390x
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_s390x
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_s390x
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_s390x
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_s390x
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_s390x
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_s390x
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_s390x
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_s390x
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_s390x
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_s390x
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_s390x
 #define cpu_ldub_data_ra cpu_ldub_data_ra_s390x
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_s390x
-#define cpu_lduw_data_ra cpu_lduw_data_ra_s390x
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_s390x
-#define cpu_ldl_data_ra cpu_ldl_data_ra_s390x
-#define cpu_ldq_data_ra cpu_ldq_data_ra_s390x
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_s390x
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_s390x
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_s390x
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_s390x
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_s390x
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_s390x
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_s390x
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_s390x
 #define cpu_ldub_data cpu_ldub_data_s390x
 #define cpu_ldsb_data cpu_ldsb_data_s390x
-#define cpu_lduw_data cpu_lduw_data_s390x
-#define cpu_ldsw_data cpu_ldsw_data_s390x
-#define cpu_ldl_data cpu_ldl_data_s390x
-#define cpu_ldq_data cpu_ldq_data_s390x
+#define cpu_lduw_be_data cpu_lduw_be_data_s390x
+#define cpu_lduw_le_data cpu_lduw_le_data_s390x
+#define cpu_ldsw_be_data cpu_ldsw_be_data_s390x
+#define cpu_ldsw_le_data cpu_ldsw_le_data_s390x
+#define cpu_ldl_be_data cpu_ldl_be_data_s390x
+#define cpu_ldl_le_data cpu_ldl_le_data_s390x
+#define cpu_ldq_le_data cpu_ldq_le_data_s390x
+#define cpu_ldq_be_data cpu_ldq_be_data_s390x
 #define helper_ret_stb_mmu helper_ret_stb_mmu_s390x
 #define helper_le_stw_mmu helper_le_stw_mmu_s390x
 #define helper_be_stw_mmu helper_be_stw_mmu_s390x
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_s390x
 #define helper_be_stq_mmu helper_be_stq_mmu_s390x
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_s390x
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_s390x
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_s390x
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_s390x
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_s390x
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_s390x
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_s390x
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_s390x
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_s390x
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_s390x
 #define cpu_stb_data_ra cpu_stb_data_ra_s390x
-#define cpu_stw_data_ra cpu_stw_data_ra_s390x
-#define cpu_stl_data_ra cpu_stl_data_ra_s390x
-#define cpu_stq_data_ra cpu_stq_data_ra_s390x
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_s390x
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_s390x
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_s390x
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_s390x
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_s390x
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_s390x
 #define cpu_stb_data cpu_stb_data_s390x
-#define cpu_stw_data cpu_stw_data_s390x
-#define cpu_stl_data cpu_stl_data_s390x
-#define cpu_stq_data cpu_stq_data_s390x
+#define cpu_stw_be_data cpu_stw_be_data_s390x
+#define cpu_stw_le_data cpu_stw_le_data_s390x
+#define cpu_stl_be_data cpu_stl_be_data_s390x
+#define cpu_stl_le_data cpu_stl_le_data_s390x
+#define cpu_stq_be_data cpu_stq_be_data_s390x
+#define cpu_stq_le_data cpu_stq_le_data_s390x
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_s390x
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_s390x
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_s390x
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_s390x
 #define cpu_ldl_code cpu_ldl_code_s390x
 #define cpu_ldq_code cpu_ldq_code_s390x
+#define cpu_interrupt_handler cpu_interrupt_handler_s390x
 #define helper_div_i32 helper_div_i32_s390x
 #define helper_rem_i32 helper_rem_i32_s390x
 #define helper_divu_i32 helper_divu_i32_s390x
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_s390x
 #define helper_gvec_sar32i helper_gvec_sar32i_s390x
 #define helper_gvec_sar64i helper_gvec_sar64i_s390x
+#define helper_gvec_rotl8i helper_gvec_rotl8i_s390x
+#define helper_gvec_rotl16i helper_gvec_rotl16i_s390x
+#define helper_gvec_rotl32i helper_gvec_rotl32i_s390x
+#define helper_gvec_rotl64i helper_gvec_rotl64i_s390x
 #define helper_gvec_shl8v helper_gvec_shl8v_s390x
 #define helper_gvec_shl16v helper_gvec_shl16v_s390x
 #define helper_gvec_shl32v helper_gvec_shl32v_s390x
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_s390x
 #define helper_gvec_sar32v helper_gvec_sar32v_s390x
 #define helper_gvec_sar64v helper_gvec_sar64v_s390x
+#define helper_gvec_rotl8v helper_gvec_rotl8v_s390x
+#define helper_gvec_rotl16v helper_gvec_rotl16v_s390x
+#define helper_gvec_rotl32v helper_gvec_rotl32v_s390x
+#define helper_gvec_rotl64v helper_gvec_rotl64v_s390x
+#define helper_gvec_rotr8v helper_gvec_rotr8v_s390x
+#define helper_gvec_rotr16v helper_gvec_rotr16v_s390x
+#define helper_gvec_rotr32v helper_gvec_rotr32v_s390x
+#define helper_gvec_rotr64v helper_gvec_rotr64v_s390x
 #define helper_gvec_eq8 helper_gvec_eq8_s390x
 #define helper_gvec_ne8 helper_gvec_ne8_s390x
 #define helper_gvec_lt8 helper_gvec_lt8_s390x
diff --git a/qemu/sparc.h b/qemu/sparc.h
index 32be40ab68..aeee045594 100644
--- a/qemu/sparc.h
+++ b/qemu/sparc.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_sparc
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_sparc
 #define tcg_gen_st_i64 tcg_gen_st_i64_sparc
+#define tcg_gen_add_i64 tcg_gen_add_i64_sparc
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_sparc
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_sparc
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_sparc
 #define cpu_icount_to_ns cpu_icount_to_ns_sparc
 #define cpu_is_stopped cpu_is_stopped_sparc
 #define cpu_get_ticks cpu_get_ticks_sparc
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_sparc
 #define floatx80_mul floatx80_mul_sparc
 #define floatx80_div floatx80_div_sparc
+#define floatx80_modrem floatx80_modrem_sparc
+#define floatx80_mod floatx80_mod_sparc
 #define floatx80_rem floatx80_rem_sparc
 #define floatx80_sqrt floatx80_sqrt_sparc
 #define floatx80_eq floatx80_eq_sparc
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_sparc
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_sparc
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_sparc
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_sparc
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_sparc
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_sparc
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_sparc
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_sparc
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_sparc
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_sparc
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_sparc
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_sparc
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_sparc
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_sparc
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_sparc
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_sparc
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_sparc
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_sparc
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_sparc
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_sparc
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_sparc
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_sparc
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_sparc
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_sparc
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_sparc
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_sparc
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_sparc
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_sparc
 #define tcg_gen_shri_vec tcg_gen_shri_vec_sparc
 #define tcg_gen_sari_vec tcg_gen_sari_vec_sparc
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_sparc
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_sparc
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_sparc
 #define tcg_gen_add_vec tcg_gen_add_vec_sparc
 #define tcg_gen_sub_vec tcg_gen_sub_vec_sparc
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_sparc
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_sparc
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_sparc
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_sparc
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_sparc
 #define tcg_gen_shls_vec tcg_gen_shls_vec_sparc
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_sparc
 #define tcg_gen_sars_vec tcg_gen_sars_vec_sparc
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_sparc
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_sparc
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_sparc
 #define tb_htable_lookup tb_htable_lookup_sparc
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_sparc
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_sparc
 #define tlb_init tlb_init_sparc
+#define tlb_destroy tlb_destroy_sparc
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_sparc
 #define tlb_flush tlb_flush_sparc
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_sparc
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_sparc
 #define get_page_addr_code_hostp get_page_addr_code_hostp_sparc
 #define get_page_addr_code get_page_addr_code_sparc
+#define probe_access_flags probe_access_flags_sparc
 #define probe_access probe_access_sparc
 #define tlb_vaddr_to_host tlb_vaddr_to_host_sparc
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_sparc
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_sparc
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_sparc
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_sparc
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_sparc
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_sparc
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_sparc
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_sparc
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_sparc
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_sparc
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_sparc
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_sparc
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_sparc
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_sparc
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_sparc
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_sparc
 #define cpu_ldub_data_ra cpu_ldub_data_ra_sparc
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_sparc
-#define cpu_lduw_data_ra cpu_lduw_data_ra_sparc
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_sparc
-#define cpu_ldl_data_ra cpu_ldl_data_ra_sparc
-#define cpu_ldq_data_ra cpu_ldq_data_ra_sparc
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_sparc
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_sparc
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_sparc
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_sparc
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_sparc
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_sparc
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_sparc
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_sparc
 #define cpu_ldub_data cpu_ldub_data_sparc
 #define cpu_ldsb_data cpu_ldsb_data_sparc
-#define cpu_lduw_data cpu_lduw_data_sparc
-#define cpu_ldsw_data cpu_ldsw_data_sparc
-#define cpu_ldl_data cpu_ldl_data_sparc
-#define cpu_ldq_data cpu_ldq_data_sparc
+#define cpu_lduw_be_data cpu_lduw_be_data_sparc
+#define cpu_lduw_le_data cpu_lduw_le_data_sparc
+#define cpu_ldsw_be_data cpu_ldsw_be_data_sparc
+#define cpu_ldsw_le_data cpu_ldsw_le_data_sparc
+#define cpu_ldl_be_data cpu_ldl_be_data_sparc
+#define cpu_ldl_le_data cpu_ldl_le_data_sparc
+#define cpu_ldq_le_data cpu_ldq_le_data_sparc
+#define cpu_ldq_be_data cpu_ldq_be_data_sparc
 #define helper_ret_stb_mmu helper_ret_stb_mmu_sparc
 #define helper_le_stw_mmu helper_le_stw_mmu_sparc
 #define helper_be_stw_mmu helper_be_stw_mmu_sparc
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_sparc
 #define helper_be_stq_mmu helper_be_stq_mmu_sparc
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_sparc
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_sparc
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_sparc
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_sparc
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_sparc
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_sparc
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_sparc
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_sparc
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_sparc
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_sparc
 #define cpu_stb_data_ra cpu_stb_data_ra_sparc
-#define cpu_stw_data_ra cpu_stw_data_ra_sparc
-#define cpu_stl_data_ra cpu_stl_data_ra_sparc
-#define cpu_stq_data_ra cpu_stq_data_ra_sparc
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_sparc
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_sparc
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_sparc
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_sparc
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_sparc
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_sparc
 #define cpu_stb_data cpu_stb_data_sparc
-#define cpu_stw_data cpu_stw_data_sparc
-#define cpu_stl_data cpu_stl_data_sparc
-#define cpu_stq_data cpu_stq_data_sparc
+#define cpu_stw_be_data cpu_stw_be_data_sparc
+#define cpu_stw_le_data cpu_stw_le_data_sparc
+#define cpu_stl_be_data cpu_stl_be_data_sparc
+#define cpu_stl_le_data cpu_stl_le_data_sparc
+#define cpu_stq_be_data cpu_stq_be_data_sparc
+#define cpu_stq_le_data cpu_stq_le_data_sparc
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_sparc
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_sparc
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_sparc
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_sparc
 #define cpu_ldl_code cpu_ldl_code_sparc
 #define cpu_ldq_code cpu_ldq_code_sparc
+#define cpu_interrupt_handler cpu_interrupt_handler_sparc
 #define helper_div_i32 helper_div_i32_sparc
 #define helper_rem_i32 helper_rem_i32_sparc
 #define helper_divu_i32 helper_divu_i32_sparc
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_sparc
 #define helper_gvec_sar32i helper_gvec_sar32i_sparc
 #define helper_gvec_sar64i helper_gvec_sar64i_sparc
+#define helper_gvec_rotl8i helper_gvec_rotl8i_sparc
+#define helper_gvec_rotl16i helper_gvec_rotl16i_sparc
+#define helper_gvec_rotl32i helper_gvec_rotl32i_sparc
+#define helper_gvec_rotl64i helper_gvec_rotl64i_sparc
 #define helper_gvec_shl8v helper_gvec_shl8v_sparc
 #define helper_gvec_shl16v helper_gvec_shl16v_sparc
 #define helper_gvec_shl32v helper_gvec_shl32v_sparc
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_sparc
 #define helper_gvec_sar32v helper_gvec_sar32v_sparc
 #define helper_gvec_sar64v helper_gvec_sar64v_sparc
+#define helper_gvec_rotl8v helper_gvec_rotl8v_sparc
+#define helper_gvec_rotl16v helper_gvec_rotl16v_sparc
+#define helper_gvec_rotl32v helper_gvec_rotl32v_sparc
+#define helper_gvec_rotl64v helper_gvec_rotl64v_sparc
+#define helper_gvec_rotr8v helper_gvec_rotr8v_sparc
+#define helper_gvec_rotr16v helper_gvec_rotr16v_sparc
+#define helper_gvec_rotr32v helper_gvec_rotr32v_sparc
+#define helper_gvec_rotr64v helper_gvec_rotr64v_sparc
 #define helper_gvec_eq8 helper_gvec_eq8_sparc
 #define helper_gvec_ne8 helper_gvec_ne8_sparc
 #define helper_gvec_lt8 helper_gvec_lt8_sparc
diff --git a/qemu/sparc64.h b/qemu/sparc64.h
index c9f6f2fcbd..f12e6380cf 100644
--- a/qemu/sparc64.h
+++ b/qemu/sparc64.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_sparc64
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_sparc64
 #define tcg_gen_st_i64 tcg_gen_st_i64_sparc64
+#define tcg_gen_add_i64 tcg_gen_add_i64_sparc64
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_sparc64
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_sparc64
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_sparc64
 #define cpu_icount_to_ns cpu_icount_to_ns_sparc64
 #define cpu_is_stopped cpu_is_stopped_sparc64
 #define cpu_get_ticks cpu_get_ticks_sparc64
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_sparc64
 #define floatx80_mul floatx80_mul_sparc64
 #define floatx80_div floatx80_div_sparc64
+#define floatx80_modrem floatx80_modrem_sparc64
+#define floatx80_mod floatx80_mod_sparc64
 #define floatx80_rem floatx80_rem_sparc64
 #define floatx80_sqrt floatx80_sqrt_sparc64
 #define floatx80_eq floatx80_eq_sparc64
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_sparc64
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_sparc64
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_sparc64
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_sparc64
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_sparc64
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_sparc64
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_sparc64
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_sparc64
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_sparc64
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_sparc64
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_sparc64
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_sparc64
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_sparc64
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_sparc64
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_sparc64
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_sparc64
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_sparc64
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_sparc64
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_sparc64
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_sparc64
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_sparc64
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_sparc64
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_sparc64
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_sparc64
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_sparc64
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_sparc64
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_sparc64
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_sparc64
 #define tcg_gen_shri_vec tcg_gen_shri_vec_sparc64
 #define tcg_gen_sari_vec tcg_gen_sari_vec_sparc64
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_sparc64
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_sparc64
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_sparc64
 #define tcg_gen_add_vec tcg_gen_add_vec_sparc64
 #define tcg_gen_sub_vec tcg_gen_sub_vec_sparc64
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_sparc64
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_sparc64
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_sparc64
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_sparc64
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_sparc64
 #define tcg_gen_shls_vec tcg_gen_shls_vec_sparc64
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_sparc64
 #define tcg_gen_sars_vec tcg_gen_sars_vec_sparc64
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_sparc64
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_sparc64
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_sparc64
 #define tb_htable_lookup tb_htable_lookup_sparc64
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_sparc64
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_sparc64
 #define tlb_init tlb_init_sparc64
+#define tlb_destroy tlb_destroy_sparc64
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_sparc64
 #define tlb_flush tlb_flush_sparc64
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_sparc64
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_sparc64
 #define get_page_addr_code_hostp get_page_addr_code_hostp_sparc64
 #define get_page_addr_code get_page_addr_code_sparc64
+#define probe_access_flags probe_access_flags_sparc64
 #define probe_access probe_access_sparc64
 #define tlb_vaddr_to_host tlb_vaddr_to_host_sparc64
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_sparc64
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_sparc64
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_sparc64
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_sparc64
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_sparc64
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_sparc64
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_sparc64
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_sparc64
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_sparc64
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_sparc64
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_sparc64
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_sparc64
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_sparc64
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_sparc64
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_sparc64
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_sparc64
 #define cpu_ldub_data_ra cpu_ldub_data_ra_sparc64
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_sparc64
-#define cpu_lduw_data_ra cpu_lduw_data_ra_sparc64
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_sparc64
-#define cpu_ldl_data_ra cpu_ldl_data_ra_sparc64
-#define cpu_ldq_data_ra cpu_ldq_data_ra_sparc64
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_sparc64
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_sparc64
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_sparc64
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_sparc64
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_sparc64
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_sparc64
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_sparc64
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_sparc64
 #define cpu_ldub_data cpu_ldub_data_sparc64
 #define cpu_ldsb_data cpu_ldsb_data_sparc64
-#define cpu_lduw_data cpu_lduw_data_sparc64
-#define cpu_ldsw_data cpu_ldsw_data_sparc64
-#define cpu_ldl_data cpu_ldl_data_sparc64
-#define cpu_ldq_data cpu_ldq_data_sparc64
+#define cpu_lduw_be_data cpu_lduw_be_data_sparc64
+#define cpu_lduw_le_data cpu_lduw_le_data_sparc64
+#define cpu_ldsw_be_data cpu_ldsw_be_data_sparc64
+#define cpu_ldsw_le_data cpu_ldsw_le_data_sparc64
+#define cpu_ldl_be_data cpu_ldl_be_data_sparc64
+#define cpu_ldl_le_data cpu_ldl_le_data_sparc64
+#define cpu_ldq_le_data cpu_ldq_le_data_sparc64
+#define cpu_ldq_be_data cpu_ldq_be_data_sparc64
 #define helper_ret_stb_mmu helper_ret_stb_mmu_sparc64
 #define helper_le_stw_mmu helper_le_stw_mmu_sparc64
 #define helper_be_stw_mmu helper_be_stw_mmu_sparc64
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_sparc64
 #define helper_be_stq_mmu helper_be_stq_mmu_sparc64
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_sparc64
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_sparc64
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_sparc64
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_sparc64
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_sparc64
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_sparc64
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_sparc64
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_sparc64
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_sparc64
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_sparc64
 #define cpu_stb_data_ra cpu_stb_data_ra_sparc64
-#define cpu_stw_data_ra cpu_stw_data_ra_sparc64
-#define cpu_stl_data_ra cpu_stl_data_ra_sparc64
-#define cpu_stq_data_ra cpu_stq_data_ra_sparc64
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_sparc64
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_sparc64
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_sparc64
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_sparc64
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_sparc64
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_sparc64
 #define cpu_stb_data cpu_stb_data_sparc64
-#define cpu_stw_data cpu_stw_data_sparc64
-#define cpu_stl_data cpu_stl_data_sparc64
-#define cpu_stq_data cpu_stq_data_sparc64
+#define cpu_stw_be_data cpu_stw_be_data_sparc64
+#define cpu_stw_le_data cpu_stw_le_data_sparc64
+#define cpu_stl_be_data cpu_stl_be_data_sparc64
+#define cpu_stl_le_data cpu_stl_le_data_sparc64
+#define cpu_stq_be_data cpu_stq_be_data_sparc64
+#define cpu_stq_le_data cpu_stq_le_data_sparc64
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_sparc64
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_sparc64
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_sparc64
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_sparc64
 #define cpu_ldl_code cpu_ldl_code_sparc64
 #define cpu_ldq_code cpu_ldq_code_sparc64
+#define cpu_interrupt_handler cpu_interrupt_handler_sparc64
 #define helper_div_i32 helper_div_i32_sparc64
 #define helper_rem_i32 helper_rem_i32_sparc64
 #define helper_divu_i32 helper_divu_i32_sparc64
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_sparc64
 #define helper_gvec_sar32i helper_gvec_sar32i_sparc64
 #define helper_gvec_sar64i helper_gvec_sar64i_sparc64
+#define helper_gvec_rotl8i helper_gvec_rotl8i_sparc64
+#define helper_gvec_rotl16i helper_gvec_rotl16i_sparc64
+#define helper_gvec_rotl32i helper_gvec_rotl32i_sparc64
+#define helper_gvec_rotl64i helper_gvec_rotl64i_sparc64
 #define helper_gvec_shl8v helper_gvec_shl8v_sparc64
 #define helper_gvec_shl16v helper_gvec_shl16v_sparc64
 #define helper_gvec_shl32v helper_gvec_shl32v_sparc64
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_sparc64
 #define helper_gvec_sar32v helper_gvec_sar32v_sparc64
 #define helper_gvec_sar64v helper_gvec_sar64v_sparc64
+#define helper_gvec_rotl8v helper_gvec_rotl8v_sparc64
+#define helper_gvec_rotl16v helper_gvec_rotl16v_sparc64
+#define helper_gvec_rotl32v helper_gvec_rotl32v_sparc64
+#define helper_gvec_rotl64v helper_gvec_rotl64v_sparc64
+#define helper_gvec_rotr8v helper_gvec_rotr8v_sparc64
+#define helper_gvec_rotr16v helper_gvec_rotr16v_sparc64
+#define helper_gvec_rotr32v helper_gvec_rotr32v_sparc64
+#define helper_gvec_rotr64v helper_gvec_rotr64v_sparc64
 #define helper_gvec_eq8 helper_gvec_eq8_sparc64
 #define helper_gvec_ne8 helper_gvec_ne8_sparc64
 #define helper_gvec_lt8 helper_gvec_lt8_sparc64
diff --git a/qemu/target/arm/backup.c b/qemu/target/arm/backup.c
new file mode 100644
index 0000000000..5c7a651f71
--- /dev/null
+++ b/qemu/target/arm/backup.c
@@ -0,0 +1,5431 @@
+/*
+ * ARM SVE Operations
+ *
+ * Copyright (c) 2018 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "exec/helper-proto.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "fpu/softfloat.h"
+#include "tcg/tcg.h"
+
+
+/* Note that vector data is stored in host-endian 64-bit chunks,
+   so addressing units smaller than that needs a host-endian fixup.  */
+#ifdef HOST_WORDS_BIGENDIAN
+#define H1(x)   ((x) ^ 7)
+#define H1_2(x) ((x) ^ 6)
+#define H1_4(x) ((x) ^ 4)
+#define H2(x)   ((x) ^ 3)
+#define H4(x)   ((x) ^ 1)
+#else
+#define H1(x)   (x)
+#define H1_2(x) (x)
+#define H1_4(x) (x)
+#define H2(x)   (x)
+#define H4(x)   (x)
+#endif
+
+/* Return a value for NZCV as per the ARM PredTest pseudofunction.
+ *
+ * The return value has bit 31 set if N is set, bit 1 set if Z is clear,
+ * and bit 0 set if C is set.  Compare the definitions of these variables
+ * within CPUARMState.
+ */
+
+/* For no G bits set, NZCV = C.  */
+#define PREDTEST_INIT  1
+
+/* This is an iterative function, called for each Pd and Pg word
+ * moving forward.
+ */
+static uint32_t iter_predtest_fwd(uint64_t d, uint64_t g, uint32_t flags)
+{
+    if (likely(g)) {
+        /* Compute N from first D & G.
+           Use bit 2 to signal first G bit seen.  */
+        if (!(flags & 4)) {
+#ifdef _MSC_VER
+            flags |= ((d & (g & (0 - g))) != 0) << 31;
+#else
+            flags |= ((d & (g & -g)) != 0) << 31;
+#endif
+            flags |= 4;
+        }
+
+        /* Accumulate Z from each D & G.  */
+        flags |= ((d & g) != 0) << 1;
+
+        /* Compute C from last !(D & G).  Replace previous.  */
+        flags = deposit32(flags, 0, 1, (d & pow2floor(g)) == 0);
+    }
+    return flags;
+}
+
+/* This is an iterative function, called for each Pd and Pg word
+ * moving backward.
+ */
+static uint32_t iter_predtest_bwd(uint64_t d, uint64_t g, uint32_t flags)
+{
+    if (likely(g)) {
+        /* Compute C from first (i.e last) !(D & G).
+           Use bit 2 to signal first G bit seen.  */
+        if (!(flags & 4)) {
+            flags += 4 - 1; /* add bit 2, subtract C from PREDTEST_INIT */
+            flags |= (d & pow2floor(g)) == 0;
+        }
+
+        /* Accumulate Z from each D & G.  */
+        flags |= ((d & g) != 0) << 1;
+
+        /* Compute N from last (i.e first) D & G.  Replace previous.  */
+#ifdef _MSC_VER
+        flags = deposit32(flags, 31, 1, (d & (g & (0 - g))) != 0);
+#else
+        flags = deposit32(flags, 31, 1, (d & (g & -g)) != 0);
+#endif
+    }
+    return flags;
+}
+
+/* The same for a single word predicate.  */
+uint32_t HELPER(sve_predtest1)(uint64_t d, uint64_t g)
+{
+    return iter_predtest_fwd(d, g, PREDTEST_INIT);
+}
+
+/* The same for a multi-word predicate.  */
+uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words)
+{
+    uint32_t flags = PREDTEST_INIT;
+    uint64_t *d = vd, *g = vg;
+    uintptr_t i = 0;
+
+    do {
+        flags = iter_predtest_fwd(d[i], g[i], flags);
+    } while (++i < words);
+
+    return flags;
+}
+
+/* Expand active predicate bits to bytes, for byte elements.
+ *  for (i = 0; i < 256; ++i) {
+ *      unsigned long m = 0;
+ *      for (j = 0; j < 8; j++) {
+ *          if ((i >> j) & 1) {
+ *              m |= 0xfful << (j << 3);
+ *          }
+ *      }
+ *      printf("0x%016lx,\n", m);
+ *  }
+ */
+static inline uint64_t expand_pred_b(uint8_t byte)
+{
+    static const uint64_t word[256] = {
+        0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00,
+        0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff,
+        0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000,
+        0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff,
+        0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00,
+        0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff,
+        0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000,
+        0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff,
+        0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00,
+        0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff,
+        0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000,
+        0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff,
+        0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00,
+        0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff,
+        0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000,
+        0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff,
+        0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00,
+        0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff,
+        0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000,
+        0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff,
+        0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00,
+        0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff,
+        0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000,
+        0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff,
+        0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00,
+        0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff,
+        0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000,
+        0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff,
+        0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00,
+        0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff,
+        0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000,
+        0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff,
+        0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00,
+        0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff,
+        0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000,
+        0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff,
+        0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00,
+        0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff,
+        0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000,
+        0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff,
+        0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00,
+        0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff,
+        0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000,
+        0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff,
+        0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00,
+        0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff,
+        0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000,
+        0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff,
+        0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00,
+        0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff,
+        0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000,
+        0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff,
+        0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00,
+        0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff,
+        0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000,
+        0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff,
+        0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00,
+        0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff,
+        0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000,
+        0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff,
+        0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00,
+        0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff,
+        0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000,
+        0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff,
+        0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00,
+        0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff,
+        0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000,
+        0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff,
+        0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00,
+        0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff,
+        0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000,
+        0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff,
+        0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00,
+        0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff,
+        0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000,
+        0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff,
+        0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00,
+        0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff,
+        0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000,
+        0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff,
+        0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00,
+        0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff,
+        0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000,
+        0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff,
+        0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00,
+        0xffffffffffffffff,
+    };
+    return word[byte];
+}
+
+/* Similarly for half-word elements.
+ *  for (i = 0; i < 256; ++i) {
+ *      unsigned long m = 0;
+ *      if (i & 0xaa) {
+ *          continue;
+ *      }
+ *      for (j = 0; j < 8; j += 2) {
+ *          if ((i >> j) & 1) {
+ *              m |= 0xfffful << (j << 3);
+ *          }
+ *      }
+ *      printf("[0x%x] = 0x%016lx,\n", i, m);
+ *  }
+ */
+static inline uint64_t expand_pred_h(uint8_t byte)
+{
+    static const uint64_t word[] = {
+        [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000,
+        [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000,
+        [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000,
+        [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000,
+        [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000,
+        [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000,
+        [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000,
+        [0x55] = 0xffffffffffffffff,
+    };
+    return word[byte & 0x55];
+}
+
+/* Similarly for single word elements.  */
+static inline uint64_t expand_pred_s(uint8_t byte)
+{
+    static const uint64_t word[] = {
+        [0x01] = 0x00000000ffffffffull,
+        [0x10] = 0xffffffff00000000ull,
+        [0x11] = 0xffffffffffffffffull,
+    };
+    return word[byte & 0x11];
+}
+
+/* Swap 16-bit words within a 32-bit word.  */
+static inline uint32_t hswap32(uint32_t h)
+{
+    return rol32(h, 16);
+}
+
+/* Swap 16-bit words within a 64-bit word.  */
+static inline uint64_t hswap64(uint64_t h)
+{
+    uint64_t m = 0x0000ffff0000ffffull;
+    h = rol64(h, 32);
+    return ((h & m) << 16) | ((h >> 16) & m);
+}
+
+/* Swap 32-bit words within a 64-bit word.  */
+static inline uint64_t wswap64(uint64_t h)
+{
+    return rol64(h, 32);
+}
+
+#define LOGICAL_PPPP(NAME, FUNC) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)  \
+{                                                                         \
+    uintptr_t opr_sz = simd_oprsz(desc);                                  \
+    uint64_t *d = vd, *n = vn, *m = vm, *g = vg;                          \
+    uintptr_t i;                                                          \
+    for (i = 0; i < opr_sz / 8; ++i) {                                    \
+        d[i] = FUNC(n[i], m[i], g[i]);                                    \
+    }                                                                     \
+}
+
+#define DO_AND(N, M, G)  (((N) & (M)) & (G))
+#define DO_BIC(N, M, G)  (((N) & ~(M)) & (G))
+#define DO_EOR(N, M, G)  (((N) ^ (M)) & (G))
+#define DO_ORR(N, M, G)  (((N) | (M)) & (G))
+#define DO_ORN(N, M, G)  (((N) | ~(M)) & (G))
+#define DO_NOR(N, M, G)  (~((N) | (M)) & (G))
+#define DO_NAND(N, M, G) (~((N) & (M)) & (G))
+#define DO_SEL(N, M, G)  (((N) & (G)) | ((M) & ~(G)))
+
+LOGICAL_PPPP(sve_and_pppp, DO_AND)
+LOGICAL_PPPP(sve_bic_pppp, DO_BIC)
+LOGICAL_PPPP(sve_eor_pppp, DO_EOR)
+LOGICAL_PPPP(sve_sel_pppp, DO_SEL)
+LOGICAL_PPPP(sve_orr_pppp, DO_ORR)
+LOGICAL_PPPP(sve_orn_pppp, DO_ORN)
+LOGICAL_PPPP(sve_nor_pppp, DO_NOR)
+LOGICAL_PPPP(sve_nand_pppp, DO_NAND)
+
+#undef DO_AND
+#undef DO_BIC
+#undef DO_EOR
+#undef DO_ORR
+#undef DO_ORN
+#undef DO_NOR
+#undef DO_NAND
+#undef DO_SEL
+#undef LOGICAL_PPPP
+
+/* Fully general three-operand expander, controlled by a predicate.
+ * This is complicated by the host-endian storage of the register file.
+ */
+/* ??? I don't expect the compiler could ever vectorize this itself.
+ * With some tables we can convert bit masks to byte masks, and with
+ * extra care wrt byte/word ordering we could use gcc generic vectors
+ * and do 16 bytes at a time.
+ */
+#define DO_ZPZZ(NAME, TYPE, H, OP)                                       \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{                                                                       \
+    intptr_t i, opr_sz = simd_oprsz(desc);                              \
+    for (i = 0; i < opr_sz; ) {                                         \
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));                 \
+        do {                                                            \
+            if (pg & 1) {                                               \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                         \
+                TYPE mm = *(TYPE *)((char *)vm + H(i));                         \
+                *(TYPE *)((char *)vd + H(i)) = OP(nn, mm);                      \
+            }                                                           \
+            i += sizeof(TYPE), pg >>= sizeof(TYPE);                     \
+        } while (i & 15);                                               \
+    }                                                                   \
+}
+
+/* Similarly, specialized for 64-bit operands.  */
+#define DO_ZPZZ_D(NAME, TYPE, OP)                                \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;                  \
+    TYPE *d = vd, *n = vn, *m = vm;                             \
+    uint8_t *pg = vg;                                           \
+    for (i = 0; i < opr_sz; i += 1) {                           \
+        if (pg[H1(i)] & 1) {                                    \
+            TYPE nn = n[i], mm = m[i];                          \
+            d[i] = OP(nn, mm);                                  \
+        }                                                       \
+    }                                                           \
+}
+
+#define DO_AND(N, M)  (N & M)
+#define DO_EOR(N, M)  (N ^ M)
+#define DO_ORR(N, M)  (N | M)
+#define DO_BIC(N, M)  (N & ~M)
+#define DO_ADD(N, M)  (N + M)
+#define DO_SUB(N, M)  (N - M)
+#define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
+#define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
+#define DO_ABD(N, M)  ((N) >= (M) ? (N) - (M) : (M) - (N))
+#define DO_MUL(N, M)  (N * M)
+
+
+/*
+ * We must avoid the C undefined behaviour cases: division by
+ * zero and signed division of INT_MIN by -1. Both of these
+ * have architecturally defined required results for Arm.
+ * We special case all signed divisions by -1 to avoid having
+ * to deduce the minimum integer for the type involved.
+ */
+#define DO_SDIV(N, M) (unlikely(M == 0) ? 0 : unlikely(M == -1) ? -N : N / M)
+#define DO_UDIV(N, M) (unlikely(M == 0) ? 0 : N / M)
+
+DO_ZPZZ(sve_and_zpzz_b, uint8_t, H1, DO_AND)
+DO_ZPZZ(sve_and_zpzz_h, uint16_t, H1_2, DO_AND)
+DO_ZPZZ(sve_and_zpzz_s, uint32_t, H1_4, DO_AND)
+DO_ZPZZ_D(sve_and_zpzz_d, uint64_t, DO_AND)
+
+DO_ZPZZ(sve_orr_zpzz_b, uint8_t, H1, DO_ORR)
+DO_ZPZZ(sve_orr_zpzz_h, uint16_t, H1_2, DO_ORR)
+DO_ZPZZ(sve_orr_zpzz_s, uint32_t, H1_4, DO_ORR)
+DO_ZPZZ_D(sve_orr_zpzz_d, uint64_t, DO_ORR)
+
+DO_ZPZZ(sve_eor_zpzz_b, uint8_t, H1, DO_EOR)
+DO_ZPZZ(sve_eor_zpzz_h, uint16_t, H1_2, DO_EOR)
+DO_ZPZZ(sve_eor_zpzz_s, uint32_t, H1_4, DO_EOR)
+DO_ZPZZ_D(sve_eor_zpzz_d, uint64_t, DO_EOR)
+
+DO_ZPZZ(sve_bic_zpzz_b, uint8_t, H1, DO_BIC)
+DO_ZPZZ(sve_bic_zpzz_h, uint16_t, H1_2, DO_BIC)
+DO_ZPZZ(sve_bic_zpzz_s, uint32_t, H1_4, DO_BIC)
+DO_ZPZZ_D(sve_bic_zpzz_d, uint64_t, DO_BIC)
+
+DO_ZPZZ(sve_add_zpzz_b, uint8_t, H1, DO_ADD)
+DO_ZPZZ(sve_add_zpzz_h, uint16_t, H1_2, DO_ADD)
+DO_ZPZZ(sve_add_zpzz_s, uint32_t, H1_4, DO_ADD)
+DO_ZPZZ_D(sve_add_zpzz_d, uint64_t, DO_ADD)
+
+DO_ZPZZ(sve_sub_zpzz_b, uint8_t, H1, DO_SUB)
+DO_ZPZZ(sve_sub_zpzz_h, uint16_t, H1_2, DO_SUB)
+DO_ZPZZ(sve_sub_zpzz_s, uint32_t, H1_4, DO_SUB)
+DO_ZPZZ_D(sve_sub_zpzz_d, uint64_t, DO_SUB)
+
+DO_ZPZZ(sve_smax_zpzz_b, int8_t, H1, DO_MAX)
+DO_ZPZZ(sve_smax_zpzz_h, int16_t, H1_2, DO_MAX)
+DO_ZPZZ(sve_smax_zpzz_s, int32_t, H1_4, DO_MAX)
+DO_ZPZZ_D(sve_smax_zpzz_d, int64_t, DO_MAX)
+
+DO_ZPZZ(sve_umax_zpzz_b, uint8_t, H1, DO_MAX)
+DO_ZPZZ(sve_umax_zpzz_h, uint16_t, H1_2, DO_MAX)
+DO_ZPZZ(sve_umax_zpzz_s, uint32_t, H1_4, DO_MAX)
+DO_ZPZZ_D(sve_umax_zpzz_d, uint64_t, DO_MAX)
+
+DO_ZPZZ(sve_smin_zpzz_b, int8_t,  H1, DO_MIN)
+DO_ZPZZ(sve_smin_zpzz_h, int16_t,  H1_2, DO_MIN)
+DO_ZPZZ(sve_smin_zpzz_s, int32_t,  H1_4, DO_MIN)
+DO_ZPZZ_D(sve_smin_zpzz_d, int64_t,  DO_MIN)
+
+DO_ZPZZ(sve_umin_zpzz_b, uint8_t, H1, DO_MIN)
+DO_ZPZZ(sve_umin_zpzz_h, uint16_t, H1_2, DO_MIN)
+DO_ZPZZ(sve_umin_zpzz_s, uint32_t, H1_4, DO_MIN)
+DO_ZPZZ_D(sve_umin_zpzz_d, uint64_t, DO_MIN)
+
+DO_ZPZZ(sve_sabd_zpzz_b, int8_t,  H1, DO_ABD)
+DO_ZPZZ(sve_sabd_zpzz_h, int16_t,  H1_2, DO_ABD)
+DO_ZPZZ(sve_sabd_zpzz_s, int32_t,  H1_4, DO_ABD)
+DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t,  DO_ABD)
+
+DO_ZPZZ(sve_uabd_zpzz_b, uint8_t, H1, DO_ABD)
+DO_ZPZZ(sve_uabd_zpzz_h, uint16_t, H1_2, DO_ABD)
+DO_ZPZZ(sve_uabd_zpzz_s, uint32_t, H1_4, DO_ABD)
+DO_ZPZZ_D(sve_uabd_zpzz_d, uint64_t, DO_ABD)
+
+/* Because the computation type is at least twice as large as required,
+   these work for both signed and unsigned source types.  */
+static inline uint8_t do_mulh_b(int32_t n, int32_t m)
+{
+    return (n * m) >> 8;
+}
+
+static inline uint16_t do_mulh_h(int32_t n, int32_t m)
+{
+    return (n * m) >> 16;
+}
+
+static inline uint32_t do_mulh_s(int64_t n, int64_t m)
+{
+    return (n * m) >> 32;
+}
+
+static inline uint64_t do_smulh_d(uint64_t n, uint64_t m)
+{
+    uint64_t lo, hi;
+    muls64(&lo, &hi, n, m);
+    return hi;
+}
+
+static inline uint64_t do_umulh_d(uint64_t n, uint64_t m)
+{
+    uint64_t lo, hi;
+    mulu64(&lo, &hi, n, m);
+    return hi;
+}
+
+DO_ZPZZ(sve_mul_zpzz_b, uint8_t, H1, DO_MUL)
+DO_ZPZZ(sve_mul_zpzz_h, uint16_t, H1_2, DO_MUL)
+DO_ZPZZ(sve_mul_zpzz_s, uint32_t, H1_4, DO_MUL)
+DO_ZPZZ_D(sve_mul_zpzz_d, uint64_t, DO_MUL)
+
+DO_ZPZZ(sve_smulh_zpzz_b, int8_t, H1, do_mulh_b)
+DO_ZPZZ(sve_smulh_zpzz_h, int16_t, H1_2, do_mulh_h)
+DO_ZPZZ(sve_smulh_zpzz_s, int32_t, H1_4, do_mulh_s)
+DO_ZPZZ_D(sve_smulh_zpzz_d, uint64_t, do_smulh_d)
+
+DO_ZPZZ(sve_umulh_zpzz_b, uint8_t, H1, do_mulh_b)
+DO_ZPZZ(sve_umulh_zpzz_h, uint16_t, H1_2, do_mulh_h)
+DO_ZPZZ(sve_umulh_zpzz_s, uint32_t, H1_4, do_mulh_s)
+DO_ZPZZ_D(sve_umulh_zpzz_d, uint64_t, do_umulh_d)
+
+DO_ZPZZ(sve_sdiv_zpzz_s, int32_t, H1_4, DO_SDIV)
+DO_ZPZZ_D(sve_sdiv_zpzz_d, int64_t, DO_SDIV)
+
+DO_ZPZZ(sve_udiv_zpzz_s, uint32_t, H1_4, DO_UDIV)
+DO_ZPZZ_D(sve_udiv_zpzz_d, uint64_t, DO_UDIV)
+
+/* Note that all bits of the shift are significant
+   and not modulo the element size.  */
+#define DO_ASR(N, M)  (N >> MIN(M, sizeof(N) * 8 - 1))
+#define DO_LSR(N, M)  (M < sizeof(N) * 8 ? N >> M : 0)
+#define DO_LSL(N, M)  (M < sizeof(N) * 8 ? N << M : 0)
+
+DO_ZPZZ(sve_asr_zpzz_b, int8_t, H1, DO_ASR)
+DO_ZPZZ(sve_lsr_zpzz_b, uint8_t, H1_2, DO_LSR)
+DO_ZPZZ(sve_lsl_zpzz_b, uint8_t, H1_4, DO_LSL)
+
+DO_ZPZZ(sve_asr_zpzz_h, int16_t, H1, DO_ASR)
+DO_ZPZZ(sve_lsr_zpzz_h, uint16_t, H1_2, DO_LSR)
+DO_ZPZZ(sve_lsl_zpzz_h, uint16_t, H1_4, DO_LSL)
+
+DO_ZPZZ(sve_asr_zpzz_s, int32_t, H1, DO_ASR)
+DO_ZPZZ(sve_lsr_zpzz_s, uint32_t, H1_2, DO_LSR)
+DO_ZPZZ(sve_lsl_zpzz_s, uint32_t, H1_4, DO_LSL)
+
+DO_ZPZZ_D(sve_asr_zpzz_d, int64_t, DO_ASR)
+DO_ZPZZ_D(sve_lsr_zpzz_d, uint64_t, DO_LSR)
+DO_ZPZZ_D(sve_lsl_zpzz_d, uint64_t, DO_LSL)
+
+#undef DO_ZPZZ
+#undef DO_ZPZZ_D
+
+/* Three-operand expander, controlled by a predicate, in which the
+ * third operand is "wide".  That is, for D = N op M, the same 64-bit
+ * value of M is used with all of the narrower values of N.
+ */
+#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP)                               \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{                                                                       \
+    intptr_t i, opr_sz = simd_oprsz(desc);                              \
+    for (i = 0; i < opr_sz; ) {                                         \
+        uint8_t pg = *(uint8_t *)((char *)vg + H1(i >> 3));                     \
+        TYPEW mm = *(TYPEW *)((char *)vm + i);                                  \
+        do {                                                            \
+            if (pg & 1) {                                               \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                         \
+                *(TYPE *)((char *)vd + H(i)) = OP(nn, mm);                      \
+            }                                                           \
+            i += sizeof(TYPE), pg >>= sizeof(TYPE);                     \
+        } while (i & 7);                                                \
+    }                                                                   \
+}
+
+DO_ZPZW(sve_asr_zpzw_b, int8_t, uint64_t, H1, DO_ASR)
+DO_ZPZW(sve_lsr_zpzw_b, uint8_t, uint64_t, H1, DO_LSR)
+DO_ZPZW(sve_lsl_zpzw_b, uint8_t, uint64_t, H1, DO_LSL)
+
+DO_ZPZW(sve_asr_zpzw_h, int16_t, uint64_t, H1_2, DO_ASR)
+DO_ZPZW(sve_lsr_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSR)
+DO_ZPZW(sve_lsl_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSL)
+
+DO_ZPZW(sve_asr_zpzw_s, int32_t, uint64_t, H1_4, DO_ASR)
+DO_ZPZW(sve_lsr_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSR)
+DO_ZPZW(sve_lsl_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
+
+#undef DO_ZPZW
+
+/* Fully general two-operand expander, controlled by a predicate.
+ */
+#define DO_ZPZ(NAME, TYPE, H, OP)                               \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)  \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc);                      \
+    for (i = 0; i < opr_sz; ) {                                 \
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));         \
+        do {                                                    \
+            if (pg & 1) {                                       \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                 \
+                *(TYPE *)((char *)vd + H(i)) = OP(nn);                  \
+            }                                                   \
+            i += sizeof(TYPE), pg >>= sizeof(TYPE);             \
+        } while (i & 15);                                       \
+    }                                                           \
+}
+
+/* Similarly, specialized for 64-bit operands.  */
+#define DO_ZPZ_D(NAME, TYPE, OP)                                \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)  \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;                  \
+    TYPE *d = vd, *n = vn;                                      \
+    uint8_t *pg = vg;                                           \
+    for (i = 0; i < opr_sz; i += 1) {                           \
+        if (pg[H1(i)] & 1) {                                    \
+            TYPE nn = n[i];                                     \
+            d[i] = OP(nn);                                      \
+        }                                                       \
+    }                                                           \
+}
+
+#define DO_CLS_B(N)   (clrsb32(N) - 24)
+#define DO_CLS_H(N)   (clrsb32(N) - 16)
+
+DO_ZPZ(sve_cls_b, int8_t, H1, DO_CLS_B)
+DO_ZPZ(sve_cls_h, int16_t, H1_2, DO_CLS_H)
+DO_ZPZ(sve_cls_s, int32_t, H1_4, clrsb32)
+DO_ZPZ_D(sve_cls_d, int64_t, clrsb64)
+
+#define DO_CLZ_B(N)   (clz32(N) - 24)
+#define DO_CLZ_H(N)   (clz32(N) - 16)
+
+DO_ZPZ(sve_clz_b, uint8_t, H1, DO_CLZ_B)
+DO_ZPZ(sve_clz_h, uint16_t, H1_2, DO_CLZ_H)
+DO_ZPZ(sve_clz_s, uint32_t, H1_4, clz32)
+DO_ZPZ_D(sve_clz_d, uint64_t, clz64)
+
+DO_ZPZ(sve_cnt_zpz_b, uint8_t, H1, ctpop8)
+DO_ZPZ(sve_cnt_zpz_h, uint16_t, H1_2, ctpop16)
+DO_ZPZ(sve_cnt_zpz_s, uint32_t, H1_4, ctpop32)
+DO_ZPZ_D(sve_cnt_zpz_d, uint64_t, ctpop64)
+
+#define DO_CNOT(N)    (N == 0)
+
+DO_ZPZ(sve_cnot_b, uint8_t, H1, DO_CNOT)
+DO_ZPZ(sve_cnot_h, uint16_t, H1_2, DO_CNOT)
+DO_ZPZ(sve_cnot_s, uint32_t, H1_4, DO_CNOT)
+DO_ZPZ_D(sve_cnot_d, uint64_t, DO_CNOT)
+
+#ifdef _MSC_VER
+#define DO_FABS16(N)    (N & ((uint16_t)-1 >> 1))
+#define DO_FABS32(N)    (N & ((uint32_t)-1 >> 1))
+#define DO_FABS64(N)    (N & ((uint64_t)-1 >> 1))
+
+DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS16)
+DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS32)
+DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS64)
+#else
+#define DO_FABS(N)    (N & ((__typeof(N))-1 >> 1))
+
+DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS)
+DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
+DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
+#endif
+
+#ifdef _MSC_VER
+#define DO_FNEG16(N)    (N ^ ~((uint16_t)-1 >> 1))
+#define DO_FNEG32(N)    (N ^ ~((uint32_t)-1 >> 1))
+#define DO_FNEG64(N)    (N ^ ~((uint64_t)-1 >> 1))
+
+DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG16)
+DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG32)
+DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG64)
+#else
+#define DO_FNEG(N)    (N ^ ~((__typeof(N))-1 >> 1))
+
+DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
+DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
+DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
+#endif
+
+#define DO_NOT(N)    (~N)
+
+DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
+DO_ZPZ(sve_not_zpz_h, uint16_t, H1_2, DO_NOT)
+DO_ZPZ(sve_not_zpz_s, uint32_t, H1_4, DO_NOT)
+DO_ZPZ_D(sve_not_zpz_d, uint64_t, DO_NOT)
+
+#define DO_SXTB(N)    ((int8_t)N)
+#define DO_SXTH(N)    ((int16_t)N)
+#define DO_SXTS(N)    ((int32_t)N)
+#define DO_UXTB(N)    ((uint8_t)N)
+#define DO_UXTH(N)    ((uint16_t)N)
+#define DO_UXTS(N)    ((uint32_t)N)
+
+DO_ZPZ(sve_sxtb_h, uint16_t, H1_2, DO_SXTB)
+DO_ZPZ(sve_sxtb_s, uint32_t, H1_4, DO_SXTB)
+DO_ZPZ(sve_sxth_s, uint32_t, H1_4, DO_SXTH)
+DO_ZPZ_D(sve_sxtb_d, uint64_t, DO_SXTB)
+DO_ZPZ_D(sve_sxth_d, uint64_t, DO_SXTH)
+DO_ZPZ_D(sve_sxtw_d, uint64_t, DO_SXTS)
+
+DO_ZPZ(sve_uxtb_h, uint16_t, H1_2, DO_UXTB)
+DO_ZPZ(sve_uxtb_s, uint32_t, H1_4, DO_UXTB)
+DO_ZPZ(sve_uxth_s, uint32_t, H1_4, DO_UXTH)
+DO_ZPZ_D(sve_uxtb_d, uint64_t, DO_UXTB)
+DO_ZPZ_D(sve_uxth_d, uint64_t, DO_UXTH)
+DO_ZPZ_D(sve_uxtw_d, uint64_t, DO_UXTS)
+
+#ifdef _MSC_VER
+#define DO_ABS(N)    (N < 0 ? (0 - N) : N)
+#else
+#define DO_ABS(N)    (N < 0 ? -N : N)
+#endif
+
+DO_ZPZ(sve_abs_b, int8_t, H1, DO_ABS)
+DO_ZPZ(sve_abs_h, int16_t, H1_2, DO_ABS)
+DO_ZPZ(sve_abs_s, int32_t, H1_4, DO_ABS)
+DO_ZPZ_D(sve_abs_d, int64_t, DO_ABS)
+
+#ifdef _MSC_VER
+#define DO_NEG(N)    (0 - N)
+#else
+#define DO_NEG(N)    (-N)
+#endif
+
+DO_ZPZ(sve_neg_b, uint8_t, H1, DO_NEG)
+DO_ZPZ(sve_neg_h, uint16_t, H1_2, DO_NEG)
+DO_ZPZ(sve_neg_s, uint32_t, H1_4, DO_NEG)
+DO_ZPZ_D(sve_neg_d, uint64_t, DO_NEG)
+
+DO_ZPZ(sve_revb_h, uint16_t, H1_2, bswap16)
+DO_ZPZ(sve_revb_s, uint32_t, H1_4, bswap32)
+DO_ZPZ_D(sve_revb_d, uint64_t, bswap64)
+
+DO_ZPZ(sve_revh_s, uint32_t, H1_4, hswap32)
+DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
+
+DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
+
+DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
+DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
+DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
+DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64)
+
+/* Three-operand expander, unpredicated, in which the third operand is "wide".
+ */
+#define DO_ZZW(NAME, TYPE, TYPEW, H, OP)                       \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{                                                              \
+    intptr_t i, opr_sz = simd_oprsz(desc);                     \
+    for (i = 0; i < opr_sz; ) {                                \
+        TYPEW mm = *(TYPEW *)((char *)vm + i);                         \
+        do {                                                   \
+            TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+            *(TYPE *)((char *)vd + H(i)) = OP(nn, mm);                 \
+            i += sizeof(TYPE);                                 \
+        } while (i & 7);                                       \
+    }                                                          \
+}
+
+DO_ZZW(sve_asr_zzw_b, int8_t, uint64_t, H1, DO_ASR)
+DO_ZZW(sve_lsr_zzw_b, uint8_t, uint64_t, H1, DO_LSR)
+DO_ZZW(sve_lsl_zzw_b, uint8_t, uint64_t, H1, DO_LSL)
+
+DO_ZZW(sve_asr_zzw_h, int16_t, uint64_t, H1_2, DO_ASR)
+DO_ZZW(sve_lsr_zzw_h, uint16_t, uint64_t, H1_2, DO_LSR)
+DO_ZZW(sve_lsl_zzw_h, uint16_t, uint64_t, H1_2, DO_LSL)
+
+DO_ZZW(sve_asr_zzw_s, int32_t, uint64_t, H1_4, DO_ASR)
+DO_ZZW(sve_lsr_zzw_s, uint32_t, uint64_t, H1_4, DO_LSR)
+DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
+
+#undef DO_ZZW
+
+#undef DO_CLS_B
+#undef DO_CLS_H
+#undef DO_CLZ_B
+#undef DO_CLZ_H
+#undef DO_CNOT
+#undef DO_FABS
+#undef DO_FNEG
+#undef DO_ABS
+#undef DO_NEG
+#undef DO_ZPZ
+#undef DO_ZPZ_D
+
+/* Two-operand reduction expander, controlled by a predicate.
+ * The difference between TYPERED and TYPERET has to do with
+ * sign-extension.  E.g. for SMAX, TYPERED must be signed,
+ * but TYPERET must be unsigned so that e.g. a 32-bit value
+ * is not sign-extended to the ABI uint64_t return type.
+ */
+/* ??? If we were to vectorize this by hand the reduction ordering
+ * would change.  For integer operands, this is perfectly fine.
+ */
+#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \
+uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc)   \
+{                                                          \
+    intptr_t i, opr_sz = simd_oprsz(desc);                 \
+    TYPERED ret = INIT;                                    \
+    for (i = 0; i < opr_sz; ) {                            \
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));    \
+        do {                                               \
+            if (pg & 1) {                                  \
+                TYPEELT nn = *(TYPEELT *)((char *)vn + H(i));      \
+                ret = OP(ret, nn);                         \
+            }                                              \
+            i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT);  \
+        } while (i & 15);                                  \
+    }                                                      \
+    return (TYPERET)ret;                                   \
+}
+
+#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP)             \
+uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc)   \
+{                                                          \
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;             \
+    TYPEE *n = vn;                                         \
+    uint8_t *pg = vg;                                      \
+    TYPER ret = INIT;                                      \
+    for (i = 0; i < opr_sz; i += 1) {                      \
+        if (pg[H1(i)] & 1) {                               \
+            TYPEE nn = n[i];                               \
+            ret = OP(ret, nn);                             \
+        }                                                  \
+    }                                                      \
+    return ret;                                            \
+}
+
+DO_VPZ(sve_orv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_ORR)
+DO_VPZ(sve_orv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_ORR)
+DO_VPZ(sve_orv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_ORR)
+DO_VPZ_D(sve_orv_d, uint64_t, uint64_t, 0, DO_ORR)
+
+DO_VPZ(sve_eorv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_EOR)
+DO_VPZ(sve_eorv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_EOR)
+DO_VPZ(sve_eorv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_EOR)
+DO_VPZ_D(sve_eorv_d, uint64_t, uint64_t, 0, DO_EOR)
+
+DO_VPZ(sve_andv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_AND)
+DO_VPZ(sve_andv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_AND)
+DO_VPZ(sve_andv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_AND)
+DO_VPZ_D(sve_andv_d, uint64_t, uint64_t, -1, DO_AND)
+
+DO_VPZ(sve_saddv_b, int8_t, uint64_t, uint64_t, H1, 0, DO_ADD)
+DO_VPZ(sve_saddv_h, int16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD)
+DO_VPZ(sve_saddv_s, int32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD)
+
+DO_VPZ(sve_uaddv_b, uint8_t, uint64_t, uint64_t, H1, 0, DO_ADD)
+DO_VPZ(sve_uaddv_h, uint16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD)
+DO_VPZ(sve_uaddv_s, uint32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD)
+DO_VPZ_D(sve_uaddv_d, uint64_t, uint64_t, 0, DO_ADD)
+
+DO_VPZ(sve_smaxv_b, int8_t, int8_t, uint8_t, H1, INT8_MIN, DO_MAX)
+DO_VPZ(sve_smaxv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MIN, DO_MAX)
+DO_VPZ(sve_smaxv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MIN, DO_MAX)
+DO_VPZ_D(sve_smaxv_d, int64_t, int64_t, INT64_MIN, DO_MAX)
+
+DO_VPZ(sve_umaxv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_MAX)
+DO_VPZ(sve_umaxv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_MAX)
+DO_VPZ(sve_umaxv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_MAX)
+DO_VPZ_D(sve_umaxv_d, uint64_t, uint64_t, 0, DO_MAX)
+
+DO_VPZ(sve_sminv_b, int8_t, int8_t, uint8_t, H1, INT8_MAX, DO_MIN)
+DO_VPZ(sve_sminv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MAX, DO_MIN)
+DO_VPZ(sve_sminv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MAX, DO_MIN)
+DO_VPZ_D(sve_sminv_d, int64_t, int64_t, INT64_MAX, DO_MIN)
+
+DO_VPZ(sve_uminv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_MIN)
+DO_VPZ(sve_uminv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_MIN)
+DO_VPZ(sve_uminv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_MIN)
+DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN)
+
+#undef DO_VPZ
+#undef DO_VPZ_D
+
+/* Two vector operand, one scalar operand, unpredicated.  */
+#define DO_ZZI(NAME, TYPE, OP)                                       \
+void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc)   \
+{                                                                    \
+    intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE);            \
+    TYPE s = s64, *d = vd, *n = vn;                                  \
+    for (i = 0; i < opr_sz; ++i) {                                   \
+        d[i] = OP(n[i], s);                                          \
+    }                                                                \
+}
+
+#define DO_SUBR(X, Y)   (Y - X)
+
+DO_ZZI(sve_subri_b, uint8_t, DO_SUBR)
+DO_ZZI(sve_subri_h, uint16_t, DO_SUBR)
+DO_ZZI(sve_subri_s, uint32_t, DO_SUBR)
+DO_ZZI(sve_subri_d, uint64_t, DO_SUBR)
+
+DO_ZZI(sve_smaxi_b, int8_t, DO_MAX)
+DO_ZZI(sve_smaxi_h, int16_t, DO_MAX)
+DO_ZZI(sve_smaxi_s, int32_t, DO_MAX)
+DO_ZZI(sve_smaxi_d, int64_t, DO_MAX)
+
+DO_ZZI(sve_smini_b, int8_t, DO_MIN)
+DO_ZZI(sve_smini_h, int16_t, DO_MIN)
+DO_ZZI(sve_smini_s, int32_t, DO_MIN)
+DO_ZZI(sve_smini_d, int64_t, DO_MIN)
+
+DO_ZZI(sve_umaxi_b, uint8_t, DO_MAX)
+DO_ZZI(sve_umaxi_h, uint16_t, DO_MAX)
+DO_ZZI(sve_umaxi_s, uint32_t, DO_MAX)
+DO_ZZI(sve_umaxi_d, uint64_t, DO_MAX)
+
+DO_ZZI(sve_umini_b, uint8_t, DO_MIN)
+DO_ZZI(sve_umini_h, uint16_t, DO_MIN)
+DO_ZZI(sve_umini_s, uint32_t, DO_MIN)
+DO_ZZI(sve_umini_d, uint64_t, DO_MIN)
+
+#undef DO_ZZI
+
+#undef DO_AND
+#undef DO_ORR
+#undef DO_EOR
+#undef DO_BIC
+#undef DO_ADD
+#undef DO_SUB
+#undef DO_MAX
+#undef DO_MIN
+#undef DO_ABD
+#undef DO_MUL
+#undef DO_DIV
+#undef DO_ASR
+#undef DO_LSR
+#undef DO_LSL
+#undef DO_SUBR
+
+/* Similar to the ARM LastActiveElement pseudocode function, except the
+   result is multiplied by the element size.  This includes the not found
+   indication; e.g. not found for esz=3 is -8.  */
+static intptr_t last_active_element(uint64_t *g, intptr_t words, intptr_t esz)
+{
+    uint64_t mask = pred_esz_masks[esz];
+    intptr_t i = words;
+
+    do {
+        uint64_t this_g = g[--i] & mask;
+        if (this_g) {
+            return i * 64 + (63 - clz64(this_g));
+        }
+    } while (i > 0);
+    return (intptr_t)-1 << esz;
+}
+
+uint32_t HELPER(sve_pfirst)(void *vd, void *vg, uint32_t words)
+{
+    uint32_t flags = PREDTEST_INIT;
+    uint64_t *d = vd, *g = vg;
+    intptr_t i = 0;
+
+    do {
+        uint64_t this_d = d[i];
+        uint64_t this_g = g[i];
+
+        if (this_g) {
+            if (!(flags & 4)) {
+                /* Set in D the first bit of G.  */
+#ifdef _MSC_VER
+                this_d |= this_g & (0 - this_g);
+#else
+                this_d |= this_g & -this_g;
+#endif
+                d[i] = this_d;
+            }
+            flags = iter_predtest_fwd(this_d, this_g, flags);
+        }
+    } while (++i < words);
+
+    return flags;
+}
+
+uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc)
+{
+    intptr_t words = extract32(pred_desc, 0, SIMD_OPRSZ_BITS);
+    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    uint32_t flags = PREDTEST_INIT;
+    uint64_t *d = vd, *g = vg, esz_mask;
+    intptr_t i, next;
+
+    next = last_active_element(vd, words, esz) + (1ULL << esz);
+    esz_mask = pred_esz_masks[esz];
+
+    /* Similar to the pseudocode for pnext, but scaled by ESZ
+       so that we find the correct bit.  */
+    if (next < words * 64) {
+        uint64_t mask = -1;
+
+        if (next & 63) {
+            mask = ~((1ull << (next & 63)) - 1);
+            next &= -64;
+        }
+        do {
+            uint64_t this_g = g[next / 64] & esz_mask & mask;
+            if (this_g != 0) {
+                next = (next & -64) + ctz64(this_g);
+                break;
+            }
+            next += 64;
+            mask = -1;
+        } while (next < words * 64);
+    }
+
+    i = 0;
+    do {
+        uint64_t this_d = 0;
+        if (i == next / 64) {
+            this_d = 1ull << (next & 63);
+        }
+        d[i] = this_d;
+        flags = iter_predtest_fwd(this_d, g[i] & esz_mask, flags);
+    } while (++i < words);
+
+    return flags;
+}
+
+/* Store zero into every active element of Zd.  We will use this for two
+ * and three-operand predicated instructions for which logic dictates a
+ * zero result.  In particular, logical shift by element size, which is
+ * otherwise undefined on the host.
+ *
+ * For element sizes smaller than uint64_t, we use tables to expand
+ * the N bits of the controlling predicate to a byte mask, and clear
+ * those bytes.
+ */
+void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd;
+    uint8_t *pg = vg;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] &= ~expand_pred_b(pg[H1(i)]);
+    }
+}
+
+void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd;
+    uint8_t *pg = vg;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] &= ~expand_pred_h(pg[H1(i)]);
+    }
+}
+
+void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd;
+    uint8_t *pg = vg;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] &= ~expand_pred_s(pg[H1(i)]);
+    }
+}
+
+void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd;
+    uint8_t *pg = vg;
+    for (i = 0; i < opr_sz; i += 1) {
+        if (pg[H1(i)] & 1) {
+            d[i] = 0;
+        }
+    }
+}
+
+/* Copy Zn into Zd, and store zero into inactive elements.  */
+void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = n[i] & expand_pred_b(pg[H1(i)]);
+    }
+}
+
+void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = n[i] & expand_pred_h(pg[H1(i)]);
+    }
+}
+
+void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = n[i] & expand_pred_s(pg[H1(i)]);
+    }
+}
+
+void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+    for (i = 0; i < opr_sz; i += 1) {
+#ifdef _MSC_VER
+        d[i] = n[i] & ((uint64_t)0 - (uint64_t)(pg[H1(i)] & 1));
+#else
+        d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1);
+#endif
+    }
+}
+
+/* Three-operand expander, immediate operand, controlled by a predicate.
+ */
+#define DO_ZPZI(NAME, TYPE, H, OP)                              \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)  \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc);                      \
+    TYPE imm = simd_data(desc);                                 \
+    for (i = 0; i < opr_sz; ) {                                 \
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));         \
+        do {                                                    \
+            if (pg & 1) {                                       \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                 \
+                *(TYPE *)((char *)vd + H(i)) = OP(nn, imm);             \
+            }                                                   \
+            i += sizeof(TYPE), pg >>= sizeof(TYPE);             \
+        } while (i & 15);                                       \
+    }                                                           \
+}
+
+/* Similarly, specialized for 64-bit operands.  */
+#define DO_ZPZI_D(NAME, TYPE, OP)                               \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)  \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;                  \
+    TYPE *d = vd, *n = vn;                                      \
+    TYPE imm = simd_data(desc);                                 \
+    uint8_t *pg = vg;                                           \
+    for (i = 0; i < opr_sz; i += 1) {                           \
+        if (pg[H1(i)] & 1) {                                    \
+            TYPE nn = n[i];                                     \
+            d[i] = OP(nn, imm);                                 \
+        }                                                       \
+    }                                                           \
+}
+
+#define DO_SHR(N, M)  (N >> M)
+#define DO_SHL(N, M)  (N << M)
+
+/* Arithmetic shift right for division.  This rounds negative numbers
+   toward zero as per signed division.  Therefore before shifting,
+   when N is negative, add 2**M-1.  */
+#ifdef _MSC_VER
+ #define DO_ASRD(N, M) ((N + (N < 0 ? (1 << M) - 1 : 0)) >> M)
+#else
+ #define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M)
+#endif
+
+DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR)
+DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR)
+DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR)
+DO_ZPZI_D(sve_asr_zpzi_d, int64_t, DO_SHR)
+
+DO_ZPZI(sve_lsr_zpzi_b, uint8_t, H1, DO_SHR)
+DO_ZPZI(sve_lsr_zpzi_h, uint16_t, H1_2, DO_SHR)
+DO_ZPZI(sve_lsr_zpzi_s, uint32_t, H1_4, DO_SHR)
+DO_ZPZI_D(sve_lsr_zpzi_d, uint64_t, DO_SHR)
+
+DO_ZPZI(sve_lsl_zpzi_b, uint8_t, H1, DO_SHL)
+DO_ZPZI(sve_lsl_zpzi_h, uint16_t, H1_2, DO_SHL)
+DO_ZPZI(sve_lsl_zpzi_s, uint32_t, H1_4, DO_SHL)
+DO_ZPZI_D(sve_lsl_zpzi_d, uint64_t, DO_SHL)
+
+DO_ZPZI(sve_asrd_b, int8_t, H1, DO_ASRD)
+DO_ZPZI(sve_asrd_h, int16_t, H1_2, DO_ASRD)
+DO_ZPZI(sve_asrd_s, int32_t, H1_4, DO_ASRD)
+DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD)
+
+#undef DO_SHR
+#undef DO_SHL
+#undef DO_ASRD
+#undef DO_ZPZI
+#undef DO_ZPZI_D
+
+/* Fully general four-operand expander, controlled by a predicate.
+ */
+#define DO_ZPZZZ(NAME, TYPE, H, OP)                           \
+void HELPER(NAME)(void *vd, void *va, void *vn, void *vm,     \
+                  void *vg, uint32_t desc)                    \
+{                                                             \
+    intptr_t i, opr_sz = simd_oprsz(desc);                    \
+    for (i = 0; i < opr_sz; ) {                               \
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));       \
+        do {                                                  \
+            if (pg & 1) {                                     \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));               \
+                TYPE mm = *(TYPE *)((char *)vm + H(i));               \
+                TYPE aa = *(TYPE *)((char *)va + H(i));               \
+                *(TYPE *)((char *)vd + H(i)) = OP(aa, nn, mm);        \
+            }                                                 \
+            i += sizeof(TYPE), pg >>= sizeof(TYPE);           \
+        } while (i & 15);                                     \
+    }                                                         \
+}
+
+/* Similarly, specialized for 64-bit operands.  */
+#define DO_ZPZZZ_D(NAME, TYPE, OP)                            \
+void HELPER(NAME)(void *vd, void *va, void *vn, void *vm,     \
+                  void *vg, uint32_t desc)                    \
+{                                                             \
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;                \
+    TYPE *d = vd, *a = va, *n = vn, *m = vm;                  \
+    uint8_t *pg = vg;                                         \
+    for (i = 0; i < opr_sz; i += 1) {                         \
+        if (pg[H1(i)] & 1) {                                  \
+            TYPE aa = a[i], nn = n[i], mm = m[i];             \
+            d[i] = OP(aa, nn, mm);                            \
+        }                                                     \
+    }                                                         \
+}
+
+#define DO_MLA(A, N, M)  (A + N * M)
+#define DO_MLS(A, N, M)  (A - N * M)
+
+DO_ZPZZZ(sve_mla_b, uint8_t, H1, DO_MLA)
+DO_ZPZZZ(sve_mls_b, uint8_t, H1, DO_MLS)
+
+DO_ZPZZZ(sve_mla_h, uint16_t, H1_2, DO_MLA)
+DO_ZPZZZ(sve_mls_h, uint16_t, H1_2, DO_MLS)
+
+DO_ZPZZZ(sve_mla_s, uint32_t, H1_4, DO_MLA)
+DO_ZPZZZ(sve_mls_s, uint32_t, H1_4, DO_MLS)
+
+DO_ZPZZZ_D(sve_mla_d, uint64_t, DO_MLA)
+DO_ZPZZZ_D(sve_mls_d, uint64_t, DO_MLS)
+
+#undef DO_MLA
+#undef DO_MLS
+#undef DO_ZPZZZ
+#undef DO_ZPZZZ_D
+
+void HELPER(sve_index_b)(void *vd, uint32_t start,
+                         uint32_t incr, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint8_t *d = vd;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[H1(i)] = start + i * incr;
+    }
+}
+
+void HELPER(sve_index_h)(void *vd, uint32_t start,
+                         uint32_t incr, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+    uint16_t *d = vd;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[H2(i)] = start + i * incr;
+    }
+}
+
+void HELPER(sve_index_s)(void *vd, uint32_t start,
+                         uint32_t incr, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+    uint32_t *d = vd;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[H4(i)] = start + i * incr;
+    }
+}
+
+void HELPER(sve_index_d)(void *vd, uint64_t start,
+                         uint64_t incr, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = start + i * incr;
+    }
+}
+
+void HELPER(sve_adr_p32)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+    uint32_t sh = simd_data(desc);
+    uint32_t *d = vd, *n = vn, *m = vm;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = n[i] + (m[i] << sh);
+    }
+}
+
+void HELPER(sve_adr_p64)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t sh = simd_data(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = n[i] + (m[i] << sh);
+    }
+}
+
+void HELPER(sve_adr_s32)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t sh = simd_data(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = n[i] + ((uint64_t)(int32_t)m[i] << sh);
+    }
+}
+
+void HELPER(sve_adr_u32)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t sh = simd_data(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = n[i] + ((uint64_t)(uint32_t)m[i] << sh);
+    }
+}
+
+void HELPER(sve_fexpa_h)(void *vd, void *vn, uint32_t desc)
+{
+    /* These constants are cut-and-paste directly from the ARM pseudocode.  */
+    static const uint16_t coeff[] = {
+        0x0000, 0x0016, 0x002d, 0x0045, 0x005d, 0x0075, 0x008e, 0x00a8,
+        0x00c2, 0x00dc, 0x00f8, 0x0114, 0x0130, 0x014d, 0x016b, 0x0189,
+        0x01a8, 0x01c8, 0x01e8, 0x0209, 0x022b, 0x024e, 0x0271, 0x0295,
+        0x02ba, 0x02e0, 0x0306, 0x032e, 0x0356, 0x037f, 0x03a9, 0x03d4,
+    };
+    intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+    uint16_t *d = vd, *n = vn;
+
+    for (i = 0; i < opr_sz; i++) {
+        uint16_t nn = n[i];
+        intptr_t idx = extract32(nn, 0, 5);
+        uint16_t exp = extract32(nn, 5, 5);
+        d[i] = coeff[idx] | (exp << 10);
+    }
+}
+
+void HELPER(sve_fexpa_s)(void *vd, void *vn, uint32_t desc)
+{
+    /* These constants are cut-and-paste directly from the ARM pseudocode.  */
+    static const uint32_t coeff[] = {
+        0x000000, 0x0164d2, 0x02cd87, 0x043a29,
+        0x05aac3, 0x071f62, 0x08980f, 0x0a14d5,
+        0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc,
+        0x11c3d3, 0x135a2b, 0x14f4f0, 0x16942d,
+        0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda,
+        0x1ef532, 0x20b051, 0x227043, 0x243516,
+        0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
+        0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4,
+        0x3504f3, 0x36fd92, 0x38fbaf, 0x3aff5b,
+        0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd,
+        0x45672a, 0x478d75, 0x49b9be, 0x4bec15,
+        0x4e248c, 0x506334, 0x52a81e, 0x54f35b,
+        0x5744fd, 0x599d16, 0x5bfbb8, 0x5e60f5,
+        0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
+        0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177,
+        0x75257d, 0x77d0df, 0x7a83b3, 0x7d3e0c,
+    };
+    intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+    uint32_t *d = vd, *n = vn;
+
+    for (i = 0; i < opr_sz; i++) {
+        uint32_t nn = n[i];
+        intptr_t idx = extract32(nn, 0, 6);
+        uint32_t exp = extract32(nn, 6, 8);
+        d[i] = coeff[idx] | (exp << 23);
+    }
+}
+
+void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc)
+{
+    /* These constants are cut-and-paste directly from the ARM pseudocode.  */
+    static const uint64_t coeff[] = {
+        0x0000000000000ull, 0x02C9A3E778061ull, 0x059B0D3158574ull,
+        0x0874518759BC8ull, 0x0B5586CF9890Full, 0x0E3EC32D3D1A2ull,
+        0x11301D0125B51ull, 0x1429AAEA92DE0ull, 0x172B83C7D517Bull,
+        0x1A35BEB6FCB75ull, 0x1D4873168B9AAull, 0x2063B88628CD6ull,
+        0x2387A6E756238ull, 0x26B4565E27CDDull, 0x29E9DF51FDEE1ull,
+        0x2D285A6E4030Bull, 0x306FE0A31B715ull, 0x33C08B26416FFull,
+        0x371A7373AA9CBull, 0x3A7DB34E59FF7ull, 0x3DEA64C123422ull,
+        0x4160A21F72E2Aull, 0x44E086061892Dull, 0x486A2B5C13CD0ull,
+        0x4BFDAD5362A27ull, 0x4F9B2769D2CA7ull, 0x5342B569D4F82ull,
+        0x56F4736B527DAull, 0x5AB07DD485429ull, 0x5E76F15AD2148ull,
+        0x6247EB03A5585ull, 0x6623882552225ull, 0x6A09E667F3BCDull,
+        0x6DFB23C651A2Full, 0x71F75E8EC5F74ull, 0x75FEB564267C9ull,
+        0x7A11473EB0187ull, 0x7E2F336CF4E62ull, 0x82589994CCE13ull,
+        0x868D99B4492EDull, 0x8ACE5422AA0DBull, 0x8F1AE99157736ull,
+        0x93737B0CDC5E5ull, 0x97D829FDE4E50ull, 0x9C49182A3F090ull,
+        0xA0C667B5DE565ull, 0xA5503B23E255Dull, 0xA9E6B5579FDBFull,
+        0xAE89F995AD3ADull, 0xB33A2B84F15FBull, 0xB7F76F2FB5E47ull,
+        0xBCC1E904BC1D2ull, 0xC199BDD85529Cull, 0xC67F12E57D14Bull,
+        0xCB720DCEF9069ull, 0xD072D4A07897Cull, 0xD5818DCFBA487ull,
+        0xDA9E603DB3285ull, 0xDFC97337B9B5Full, 0xE502EE78B3FF6ull,
+        0xEA4AFA2A490DAull, 0xEFA1BEE615A27ull, 0xF50765B6E4540ull,
+        0xFA7C1819E90D8ull,
+    };
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+
+    for (i = 0; i < opr_sz; i++) {
+        uint64_t nn = n[i];
+        intptr_t idx = extract32(nn, 0, 6);
+        uint64_t exp = extract32(nn, 6, 11);
+        d[i] = coeff[idx] | (exp << 52);
+    }
+}
+
+void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+    uint16_t *d = vd, *n = vn, *m = vm;
+    for (i = 0; i < opr_sz; i += 1) {
+        uint16_t nn = n[i];
+        uint16_t mm = m[i];
+        if (mm & 1) {
+            nn = float16_one;
+        }
+        d[i] = nn ^ (mm & 2) << 14;
+    }
+}
+
+void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+    uint32_t *d = vd, *n = vn, *m = vm;
+    for (i = 0; i < opr_sz; i += 1) {
+        uint32_t nn = n[i];
+        uint32_t mm = m[i];
+        if (mm & 1) {
+            nn = float32_one;
+        }
+        d[i] = nn ^ (mm & 2) << 30;
+    }
+}
+
+void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i];
+        uint64_t mm = m[i];
+        if (mm & 1) {
+            nn = float64_one;
+        }
+        d[i] = nn ^ (mm & 2) << 62;
+    }
+}
+
+/*
+ * Signed saturating addition with scalar operand.
+ */
+
+void HELPER(sve_sqaddi_b)(void *d, void *a, int32_t b, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
+        int r = *(int8_t *)((char *)a + i) + b;
+        if (r > INT8_MAX) {
+            r = INT8_MAX;
+        } else if (r < INT8_MIN) {
+            r = INT8_MIN;
+        }
+        *(int8_t *)((char *)d + i) = r;
+    }
+}
+
+void HELPER(sve_sqaddi_h)(void *d, void *a, int32_t b, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
+        int r = *(int16_t *)((char *)a + i) + b;
+        if (r > INT16_MAX) {
+            r = INT16_MAX;
+        } else if (r < INT16_MIN) {
+            r = INT16_MIN;
+        }
+        *(int16_t *)((char *)d + i) = r;
+    }
+}
+
+void HELPER(sve_sqaddi_s)(void *d, void *a, int64_t b, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
+        int64_t r = *(int32_t *)((char *)a + i) + b;
+        if (r > INT32_MAX) {
+            r = INT32_MAX;
+        } else if (r < INT32_MIN) {
+            r = INT32_MIN;
+        }
+        *(int32_t *)((char *)d + i) = r;
+    }
+}
+
+void HELPER(sve_sqaddi_d)(void *d, void *a, int64_t b, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
+        int64_t ai = *(int64_t *)((char *)a + i);
+        int64_t r = ai + b;
+        if (((r ^ ai) & ~(ai ^ b)) < 0) {
+            /* Signed overflow.  */
+            r = (r < 0 ? INT64_MAX : INT64_MIN);
+        }
+        *(int64_t *)((char *)d + i) = r;
+    }
+}
+
+/*
+ * Unsigned saturating addition with scalar operand.
+ */
+
+void HELPER(sve_uqaddi_b)(void *d, void *a, int32_t b, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        int r = *(uint8_t *)((char *)a + i) + b;
+        if (r > UINT8_MAX) {
+            r = UINT8_MAX;
+        } else if (r < 0) {
+            r = 0;
+        }
+        *(uint8_t *)((char *)d + i) = r;
+    }
+}
+
+void HELPER(sve_uqaddi_h)(void *d, void *a, int32_t b, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        int r = *(uint16_t *)((char *)a + i) + b;
+        if (r > UINT16_MAX) {
+            r = UINT16_MAX;
+        } else if (r < 0) {
+            r = 0;
+        }
+        *(uint16_t *)((char *)d + i) = r;
+    }
+}
+
+void HELPER(sve_uqaddi_s)(void *d, void *a, int64_t b, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        int64_t r = *(uint32_t *)((char *)a + i) + b;
+        if (r > UINT32_MAX) {
+            r = UINT32_MAX;
+        } else if (r < 0) {
+            r = 0;
+        }
+        *(uint32_t *)((char *)d + i) = r;
+    }
+}
+
+void HELPER(sve_uqaddi_d)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        uint64_t r = *(uint64_t *)((char *)a + i) + b;
+        if (r < b) {
+            r = UINT64_MAX;
+        }
+        *(uint64_t *)((char *)d + i) = r;
+    }
+}
+
+void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        uint64_t ai = *(uint64_t *)((char *)a + i);
+        *(uint64_t *)((char *)d + i) = (ai < b ? 0 : ai - b);
+    }
+}
+
+/* Two operand predicated copy immediate with merge.  All valid immediates
+ * can fit within 17 signed bits in the simd_data field.
+ */
+void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg,
+                         uint64_t mm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+
+    mm = dup_const(MO_8, mm);
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i];
+        uint64_t pp = expand_pred_b(pg[H1(i)]);
+        d[i] = (mm & pp) | (nn & ~pp);
+    }
+}
+
+void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg,
+                         uint64_t mm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+
+    mm = dup_const(MO_16, mm);
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i];
+        uint64_t pp = expand_pred_h(pg[H1(i)]);
+        d[i] = (mm & pp) | (nn & ~pp);
+    }
+}
+
+void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg,
+                         uint64_t mm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+
+    mm = dup_const(MO_32, mm);
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i];
+        uint64_t pp = expand_pred_s(pg[H1(i)]);
+        d[i] = (mm & pp) | (nn & ~pp);
+    }
+}
+
+void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg,
+                         uint64_t mm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i];
+        d[i] = (pg[H1(i)] & 1 ? mm : nn);
+    }
+}
+
+void HELPER(sve_cpy_z_b)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd;
+    uint8_t *pg = vg;
+
+    val = dup_const(MO_8, val);
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = val & expand_pred_b(pg[H1(i)]);
+    }
+}
+
+void HELPER(sve_cpy_z_h)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd;
+    uint8_t *pg = vg;
+
+    val = dup_const(MO_16, val);
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = val & expand_pred_h(pg[H1(i)]);
+    }
+}
+
+void HELPER(sve_cpy_z_s)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd;
+    uint8_t *pg = vg;
+
+    val = dup_const(MO_32, val);
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = val & expand_pred_s(pg[H1(i)]);
+    }
+}
+
+void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i += 1) {
+        d[i] = (pg[H1(i)] & 1 ? val : 0);
+    }
+}
+
+/* Big-endian hosts need to frob the byte indices.  If the copy
+ * happens to be 8-byte aligned, then no frobbing necessary.
+ */
+static void swap_memmove(void *vd, void *vs, size_t n)
+{
+    uintptr_t d = (uintptr_t)vd;
+    uintptr_t s = (uintptr_t)vs;
+    uintptr_t o = (d | s | n) & 7;
+    size_t i;
+
+#ifndef HOST_WORDS_BIGENDIAN
+    o = 0;
+#endif
+    switch (o) {
+    case 0:
+        memmove(vd, vs, n);
+        break;
+
+    case 4:
+        if (d < s || d >= s + n) {
+            for (i = 0; i < n; i += 4) {
+                *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
+            }
+        } else {
+            for (i = n; i > 0; ) {
+                i -= 4;
+                *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
+            }
+        }
+        break;
+
+    case 2:
+    case 6:
+        if (d < s || d >= s + n) {
+            for (i = 0; i < n; i += 2) {
+                *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
+            }
+        } else {
+            for (i = n; i > 0; ) {
+                i -= 2;
+                *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
+            }
+        }
+        break;
+
+    default:
+        if (d < s || d >= s + n) {
+            for (i = 0; i < n; i++) {
+                *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
+            }
+        } else {
+            for (i = n; i > 0; ) {
+                i -= 1;
+                *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
+            }
+        }
+        break;
+    }
+}
+
+/* Similarly for memset of 0.  */
+static void swap_memzero(void *vd, size_t n)
+{
+    uintptr_t d = (uintptr_t)vd;
+    uintptr_t o = (d | n) & 7;
+    size_t i;
+
+    /* Usually, the first bit of a predicate is set, so N is 0.  */
+    if (likely(n == 0)) {
+        return;
+    }
+
+#ifndef HOST_WORDS_BIGENDIAN
+    o = 0;
+#endif
+    switch (o) {
+    case 0:
+        memset(vd, 0, n);
+        break;
+
+    case 4:
+        for (i = 0; i < n; i += 4) {
+            *(uint32_t *)H1_4(d + i) = 0;
+        }
+        break;
+
+    case 2:
+    case 6:
+        for (i = 0; i < n; i += 2) {
+            *(uint16_t *)H1_2(d + i) = 0;
+        }
+        break;
+
+    default:
+        for (i = 0; i < n; i++) {
+            *(uint8_t *)H1(d + i) = 0;
+        }
+        break;
+    }
+}
+
+void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t opr_sz = simd_oprsz(desc);
+    size_t n_ofs = simd_data(desc);
+    size_t n_siz = opr_sz - n_ofs;
+
+    if (vd != vm) {
+        swap_memmove(vd, (char *)vn + n_ofs, n_siz);
+        swap_memmove((char *)vd + n_siz, vm, n_ofs);
+    } else if (vd != vn) {
+        swap_memmove((char *)vd + n_siz, vd, n_ofs);
+        swap_memmove(vd, (char *)vn + n_ofs, n_siz);
+    } else {
+        /* vd == vn == vm.  Need temp space.  */
+        ARMVectorReg tmp;
+        swap_memmove(&tmp, vm, n_ofs);
+        swap_memmove(vd, (char *)vd + n_ofs, n_siz);
+        memcpy((char *)vd + n_siz, &tmp, n_ofs);
+    }
+}
+
+#define DO_INSR(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \
+{                                                                  \
+    intptr_t opr_sz = simd_oprsz(desc);                            \
+    swap_memmove((char *)vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE));    \
+    *(TYPE *)((char *)vd + H(0)) = val;                                    \
+}
+
+DO_INSR(sve_insr_b, uint8_t, H1)
+DO_INSR(sve_insr_h, uint16_t, H1_2)
+DO_INSR(sve_insr_s, uint32_t, H1_4)
+DO_INSR(sve_insr_d, uint64_t, )
+
+#undef DO_INSR
+
+void HELPER(sve_rev_b)(void *vd, void *vn, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+        uint64_t f = *(uint64_t *)((char *)vn + i);
+        uint64_t b = *(uint64_t *)((char *)vn + j);
+        *(uint64_t *)((char *)vd + i) = bswap64(b);
+        *(uint64_t *)((char *)vd + j) = bswap64(f);
+    }
+}
+
+void HELPER(sve_rev_h)(void *vd, void *vn, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+        uint64_t f = *(uint64_t *)((char *)vn + i);
+        uint64_t b = *(uint64_t *)((char *)vn + j);
+        *(uint64_t *)((char *)vd + i) = hswap64(b);
+        *(uint64_t *)((char *)vd + j) = hswap64(f);
+    }
+}
+
+void HELPER(sve_rev_s)(void *vd, void *vn, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+        uint64_t f = *(uint64_t *)((char *)vn + i);
+        uint64_t b = *(uint64_t *)((char *)vn + j);
+        *(uint64_t *)((char *)vd + i) = rol64(b, 32);
+        *(uint64_t *)((char *)vd + j) = rol64(f, 32);
+    }
+}
+
+void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+        uint64_t f = *(uint64_t *)((char *)vn + i);
+        uint64_t b = *(uint64_t *)((char *)vn + j);
+        *(uint64_t *)((char *)vd + i) = b;
+        *(uint64_t *)((char *)vd + j) = f;
+    }
+}
+
+#define DO_TBL(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{                                                              \
+    intptr_t i, opr_sz = simd_oprsz(desc);                     \
+    uintptr_t elem = opr_sz / sizeof(TYPE);                    \
+    TYPE *d = vd, *n = vn, *m = vm;                            \
+    ARMVectorReg tmp;                                          \
+    if (unlikely(vd == vn)) {                                  \
+        n = memcpy(&tmp, vn, opr_sz);                          \
+    }                                                          \
+    for (i = 0; i < elem; i++) {                               \
+        TYPE j = m[H(i)];                                      \
+        d[H(i)] = j < elem ? n[H(j)] : 0;                      \
+    }                                                          \
+}
+
+DO_TBL(sve_tbl_b, uint8_t, H1)
+DO_TBL(sve_tbl_h, uint16_t, H2)
+DO_TBL(sve_tbl_s, uint32_t, H4)
+DO_TBL(sve_tbl_d, uint64_t, )
+
+#undef TBL
+
+#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)           \
+{                                                              \
+    intptr_t i, opr_sz = simd_oprsz(desc);                     \
+    TYPED *d = vd;                                             \
+    TYPES *n = vn;                                             \
+    ARMVectorReg tmp;                                          \
+    if (unlikely((char *)vn - (char *)vd < opr_sz)) {                          \
+        n = memcpy(&tmp, n, opr_sz / 2);                       \
+    }                                                          \
+    for (i = 0; i < opr_sz / sizeof(TYPED); i++) {             \
+        d[HD(i)] = n[HS(i)];                                   \
+    }                                                          \
+}
+
+DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1)
+DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2)
+DO_UNPK(sve_sunpk_d, int64_t, int32_t, , H4)
+
+DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1)
+DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2)
+DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4)
+
+#undef DO_UNPK
+
+/* Mask of bits included in the even numbered predicates of width esz.
+ * We also use this for expand_bits/compress_bits, and so extend the
+ * same pattern out to 16-bit units.
+ */
+static const uint64_t even_bit_esz_masks[5] = {
+    0x5555555555555555ull,
+    0x3333333333333333ull,
+    0x0f0f0f0f0f0f0f0full,
+    0x00ff00ff00ff00ffull,
+    0x0000ffff0000ffffull,
+};
+
+/* Zero-extend units of 2**N bits to units of 2**(N+1) bits.
+ * For N==0, this corresponds to the operation that in qemu/bitops.h
+ * we call half_shuffle64; this algorithm is from Hacker's Delight,
+ * section 7-2 Shuffling Bits.
+ */
+static uint64_t expand_bits(uint64_t x, int n)
+{
+    int i;
+
+    x &= 0xffffffffu;
+    for (i = 4; i >= n; i--) {
+        int sh = 1 << i;
+        x = ((x << sh) | x) & even_bit_esz_masks[i];
+    }
+    return x;
+}
+
+/* Compress units of 2**(N+1) bits to units of 2**N bits.
+ * For N==0, this corresponds to the operation that in qemu/bitops.h
+ * we call half_unshuffle64; this algorithm is from Hacker's Delight,
+ * section 7-2 Shuffling Bits, where it is called an inverse half shuffle.
+ */
+static uint64_t compress_bits(uint64_t x, int n)
+{
+    int i;
+
+    for (i = n; i <= 4; i++) {
+        int sh = 1 << i;
+        x &= even_bit_esz_masks[i];
+        x = (x >> sh) | x;
+    }
+    return x & 0xffffffffu;
+}
+
+void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
+    uint64_t *d = vd;
+    intptr_t i;
+
+    if (oprsz <= 8) {
+        uint64_t nn = *(uint64_t *)vn;
+        uint64_t mm = *(uint64_t *)vm;
+        int half = 4 * oprsz;
+
+        nn = extract64(nn, high * half, half);
+        mm = extract64(mm, high * half, half);
+        nn = expand_bits(nn, esz);
+        mm = expand_bits(mm, esz);
+        d[0] = nn + (mm << (1 << esz));
+    } else {
+        ARMPredicateReg tmp_n, tmp_m;
+
+        /* We produce output faster than we consume input.
+           Therefore we must be mindful of possible overlap.  */
+        if (((char *)vn - (char *)vd) < (uintptr_t)oprsz) {
+            vn = memcpy(&tmp_n, vn, oprsz);
+        }
+        if (((char *)vm - (char *)vd) < (uintptr_t)oprsz) {
+            vm = memcpy(&tmp_m, vm, oprsz);
+        }
+        if (high) {
+            high = oprsz >> 1;
+        }
+
+        if ((high & 3) == 0) {
+            uint32_t *n = vn, *m = vm;
+            high >>= 2;
+
+            for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+                uint64_t nn = n[H4(high + i)];
+                uint64_t mm = m[H4(high + i)];
+
+                nn = expand_bits(nn, esz);
+                mm = expand_bits(mm, esz);
+                d[i] = nn + (mm << (1 << esz));
+            }
+        } else {
+            uint8_t *n = vn, *m = vm;
+            uint16_t *d16 = vd;
+
+            for (i = 0; i < oprsz / 2; i++) {
+                uint16_t nn = n[H1(high + i)];
+                uint16_t mm = m[H1(high + i)];
+
+                nn = expand_bits(nn, esz);
+                mm = expand_bits(mm, esz);
+                d16[H2(i)] = nn + (mm << (1 << esz));
+            }
+        }
+    }
+}
+
+void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    int odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1) << esz;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint64_t l, h;
+    intptr_t i;
+
+    if (oprsz <= 8) {
+        l = compress_bits(n[0] >> odd, esz);
+        h = compress_bits(m[0] >> odd, esz);
+        d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz);
+    } else {
+        ARMPredicateReg tmp_m;
+        intptr_t oprsz_16 = oprsz / 16;
+
+        if (((char *)vm - (char *)vd) < (uintptr_t)oprsz) {
+            m = memcpy(&tmp_m, vm, oprsz);
+        }
+
+        for (i = 0; i < oprsz_16; i++) {
+            l = n[2 * i + 0];
+            h = n[2 * i + 1];
+            l = compress_bits(l >> odd, esz);
+            h = compress_bits(h >> odd, esz);
+            d[i] = l + (h << 32);
+        }
+
+        /* For VL which is not a power of 2, the results from M do not
+           align nicely with the uint64_t for D.  Put the aligned results
+           from M into TMP_M and then copy it into place afterward.  */
+        if (oprsz & 15) {
+            d[i] = compress_bits(n[2 * i] >> odd, esz);
+
+            for (i = 0; i < oprsz_16; i++) {
+                l = m[2 * i + 0];
+                h = m[2 * i + 1];
+                l = compress_bits(l >> odd, esz);
+                h = compress_bits(h >> odd, esz);
+                tmp_m.p[i] = l + (h << 32);
+            }
+            tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz);
+
+            swap_memmove((char *)vd + oprsz / 2, &tmp_m, oprsz / 2);
+        } else {
+            for (i = 0; i < oprsz_16; i++) {
+                l = m[2 * i + 0];
+                h = m[2 * i + 1];
+                l = compress_bits(l >> odd, esz);
+                h = compress_bits(h >> odd, esz);
+                d[oprsz_16 + i] = l + (h << 32);
+            }
+        }
+    }
+}
+
+void HELPER(sve_trn_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    uintptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    bool odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint64_t mask;
+    int shr, shl;
+    intptr_t i;
+
+    shl = 1 << esz;
+    shr = 0;
+    mask = even_bit_esz_masks[esz];
+    if (odd) {
+        mask <<= shl;
+        shr = shl;
+        shl = 0;
+    }
+
+    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+        uint64_t nn = (n[i] & mask) >> shr;
+        uint64_t mm = (m[i] & mask) << shl;
+        d[i] = nn + mm;
+    }
+}
+
+/* Reverse units of 2**N bits.  */
+static uint64_t reverse_bits_64(uint64_t x, int n)
+{
+    int i, sh;
+
+    x = bswap64(x);
+    for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
+        uint64_t mask = even_bit_esz_masks[i];
+        x = ((x & mask) << sh) | ((x >> sh) & mask);
+    }
+    return x;
+}
+
+static uint8_t reverse_bits_8(uint8_t x, int n)
+{
+    static const uint8_t mask[3] = { 0x55, 0x33, 0x0f };
+    int i, sh;
+
+    for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
+        x = ((x & mask[i]) << sh) | ((x >> sh) & mask[i]);
+    }
+    return x;
+}
+
+void HELPER(sve_rev_p)(void *vd, void *vn, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    intptr_t i, oprsz_2 = oprsz / 2;
+
+    if (oprsz <= 8) {
+        uint64_t l = *(uint64_t *)vn;
+        l = reverse_bits_64(l << (64 - 8 * oprsz), esz);
+        *(uint64_t *)vd = l;
+    } else if ((oprsz & 15) == 0) {
+        for (i = 0; i < oprsz_2; i += 8) {
+            intptr_t ih = oprsz - 8 - i;
+            uint64_t l = reverse_bits_64(*(uint64_t *)((char *)vn + i), esz);
+            uint64_t h = reverse_bits_64(*(uint64_t *)((char *)vn + ih), esz);
+            *(uint64_t *)((char *)vd + i) = h;
+            *(uint64_t *)((char *)vd + ih) = l;
+        }
+    } else {
+        for (i = 0; i < oprsz_2; i += 1) {
+            intptr_t il = H1(i);
+            intptr_t ih = H1(oprsz - 1 - i);
+            uint8_t l = reverse_bits_8(*(uint8_t *)((char *)vn + il), esz);
+            uint8_t h = reverse_bits_8(*(uint8_t *)((char *)vn + ih), esz);
+            *(uint8_t *)((char *)vd + il) = h;
+            *(uint8_t *)((char *)vd + ih) = l;
+        }
+    }
+}
+
+void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
+    uint64_t *d = vd;
+    intptr_t i;
+
+    if (oprsz <= 8) {
+        uint64_t nn = *(uint64_t *)vn;
+        int half = 4 * oprsz;
+
+        nn = extract64(nn, high * half, half);
+        nn = expand_bits(nn, 0);
+        d[0] = nn;
+    } else {
+        ARMPredicateReg tmp_n;
+
+        /* We produce output faster than we consume input.
+           Therefore we must be mindful of possible overlap.  */
+        if (((char *)vn - (char *)vd) < (uintptr_t)oprsz) {
+            vn = memcpy(&tmp_n, vn, oprsz);
+        }
+        if (high) {
+            high = oprsz >> 1;
+        }
+
+        if ((high & 3) == 0) {
+            uint32_t *n = vn;
+            high >>= 2;
+
+            for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+                uint64_t nn = n[H4(high + i)];
+                d[i] = expand_bits(nn, 0);
+            }
+        } else {
+            uint16_t *d16 = vd;
+            uint8_t *n = vn;
+
+            for (i = 0; i < oprsz / 2; i++) {
+                uint16_t nn = n[H1(high + i)];
+                d16[H2(i)] = expand_bits(nn, 0);
+            }
+        }
+    }
+}
+
+#define DO_ZIP(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)       \
+{                                                                    \
+    intptr_t oprsz = simd_oprsz(desc);                               \
+    intptr_t i, oprsz_2 = oprsz / 2;                                 \
+    ARMVectorReg tmp_n, tmp_m;                                       \
+    /* We produce output faster than we consume input.               \
+       Therefore we must be mindful of possible overlap.  */         \
+    if (unlikely(((char *)vn - (char *)vd) < (uintptr_t)oprsz)) {                    \
+        vn = memcpy(&tmp_n, vn, oprsz_2);                            \
+    }                                                                \
+    if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) {                    \
+        vm = memcpy(&tmp_m, vm, oprsz_2);                            \
+    }                                                                \
+    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                    \
+        *(TYPE *)((char *)vd + H(2 * i + 0)) = *(TYPE *)((char *)vn + H(i));         \
+        *(TYPE *)((char *)vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)((char *)vm + H(i)); \
+    }                                                                \
+}
+
+DO_ZIP(sve_zip_b, uint8_t, H1)
+DO_ZIP(sve_zip_h, uint16_t, H1_2)
+DO_ZIP(sve_zip_s, uint32_t, H1_4)
+DO_ZIP(sve_zip_d, uint64_t, )
+
+#define DO_UZP(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)         \
+{                                                                      \
+    intptr_t oprsz = simd_oprsz(desc);                                 \
+    intptr_t oprsz_2 = oprsz / 2;                                      \
+    intptr_t odd_ofs = simd_data(desc);                                \
+    intptr_t i;                                                        \
+    ARMVectorReg tmp_m;                                                \
+    if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) {                      \
+        vm = memcpy(&tmp_m, vm, oprsz);                                \
+    }                                                                  \
+    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                      \
+        *(TYPE *)((char *)vd + H(i)) = *(TYPE *)((char *)vn + H(2 * i + odd_ofs));     \
+    }                                                                  \
+    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                      \
+        *(TYPE *)((char *)vd + H(oprsz_2 + i)) = *(TYPE *)((char *)vm + H(2 * i + odd_ofs)); \
+    }                                                                  \
+}
+
+DO_UZP(sve_uzp_b, uint8_t, H1)
+DO_UZP(sve_uzp_h, uint16_t, H1_2)
+DO_UZP(sve_uzp_s, uint32_t, H1_4)
+DO_UZP(sve_uzp_d, uint64_t, )
+
+#define DO_TRN(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)         \
+{                                                                      \
+    intptr_t oprsz = simd_oprsz(desc);                                 \
+    intptr_t odd_ofs = simd_data(desc);                                \
+    intptr_t i;                                                        \
+    for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) {                    \
+        TYPE ae = *(TYPE *)((char *)vn + H(i + odd_ofs));                      \
+        TYPE be = *(TYPE *)((char *)vm + H(i + odd_ofs));                      \
+        *(TYPE *)((char *)vd + H(i + 0)) = ae;                                 \
+        *(TYPE *)((char *)vd + H(i + sizeof(TYPE))) = be;                      \
+    }                                                                  \
+}
+
+DO_TRN(sve_trn_b, uint8_t, H1)
+DO_TRN(sve_trn_h, uint16_t, H1_2)
+DO_TRN(sve_trn_s, uint32_t, H1_4)
+DO_TRN(sve_trn_d, uint64_t, )
+
+#undef DO_ZIP
+#undef DO_UZP
+#undef DO_TRN
+
+void HELPER(sve_compact_s)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc) / 4;
+    uint32_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+
+    for (i = j = 0; i < opr_sz; i++) {
+        if (pg[H1(i / 2)] & (i & 1 ? 0x10 : 0x01)) {
+            d[H4(j)] = n[H4(i)];
+            j++;
+        }
+    }
+    for (; j < opr_sz; j++) {
+        d[H4(j)] = 0;
+    }
+}
+
+void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn;
+    uint8_t *pg = vg;
+
+    for (i = j = 0; i < opr_sz; i++) {
+        if (pg[H1(i)] & 1) {
+            d[j] = n[i];
+            j++;
+        }
+    }
+    for (; j < opr_sz; j++) {
+        d[j] = 0;
+    }
+}
+
+/* Similar to the ARM LastActiveElement pseudocode function, except the
+ * result is multiplied by the element size.  This includes the not found
+ * indication; e.g. not found for esz=3 is -8.
+ */
+int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+
+    return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz);
+}
+
+void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)
+{
+    intptr_t opr_sz = simd_oprsz(desc) / 8;
+    int esz = simd_data(desc);
+    uint64_t pg, first_g, last_g, len, mask = pred_esz_masks[esz];
+    intptr_t i, first_i, last_i;
+    ARMVectorReg tmp;
+
+    first_i = last_i = 0;
+    first_g = last_g = 0;
+
+    /* Find the extent of the active elements within VG.  */
+    for (i = QEMU_ALIGN_UP(opr_sz, 8) - 8; i >= 0; i -= 8) {
+        pg = *(uint64_t *)((char *)vg + i) & mask;
+        if (pg) {
+            if (last_g == 0) {
+                last_g = pg;
+                last_i = i;
+            }
+            first_g = pg;
+            first_i = i;
+        }
+    }
+
+    len = 0;
+    if (first_g != 0) {
+        first_i = first_i * 8 + ctz64(first_g);
+        last_i = last_i * 8 + 63 - clz64(last_g);
+        len = last_i - first_i + (1ULL << esz);
+        if (vd == vm) {
+            vm = memcpy(&tmp, vm, opr_sz * 8);
+        }
+        swap_memmove(vd, (char *)vn + first_i, len);
+    }
+    swap_memmove((char *)vd + len, vm, opr_sz * 8 - len);
+}
+
+void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm,
+                            void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i], mm = m[i];
+        uint64_t pp = expand_pred_b(pg[H1(i)]);
+        d[i] = (nn & pp) | (mm & ~pp);
+    }
+}
+
+void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm,
+                            void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i], mm = m[i];
+        uint64_t pp = expand_pred_h(pg[H1(i)]);
+        d[i] = (nn & pp) | (mm & ~pp);
+    }
+}
+
+void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm,
+                            void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i], mm = m[i];
+        uint64_t pp = expand_pred_s(pg[H1(i)]);
+        d[i] = (nn & pp) | (mm & ~pp);
+    }
+}
+
+void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
+                            void *vg, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i += 1) {
+        uint64_t nn = n[i], mm = m[i];
+        d[i] = (pg[H1(i)] & 1 ? nn : mm);
+    }
+}
+
+/* Two operand comparison controlled by a predicate.
+ * ??? It is very tempting to want to be able to expand this inline
+ * with x86 instructions, e.g.
+ *
+ *    vcmpeqw    zm, zn, %ymm0
+ *    vpmovmskb  %ymm0, %eax
+ *    and        $0x5555, %eax
+ *    and        pg, %eax
+ *
+ * or even aarch64, e.g.
+ *
+ *    // mask = 4000 1000 0400 0100 0040 0010 0004 0001
+ *    cmeq       v0.8h, zn, zm
+ *    and        v0.8h, v0.8h, mask
+ *    addv       h0, v0.8h
+ *    and        v0.8b, pg
+ *
+ * However, coming up with an abstraction that allows vector inputs and
+ * a scalar output, and also handles the byte-ordering of sub-uint64_t
+ * scalar outputs, is tricky.
+ */
+#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK)                                 \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{                                                                            \
+    intptr_t opr_sz = simd_oprsz(desc);                                      \
+    uint32_t flags = PREDTEST_INIT;                                          \
+    intptr_t i = opr_sz;                                                     \
+    do {                                                                     \
+        uint64_t out = 0, pg;                                                \
+        do {                                                                 \
+            i -= sizeof(TYPE), out <<= sizeof(TYPE);                         \
+            TYPE nn = *(TYPE *)((char *)vn + H(i));                                  \
+            TYPE mm = *(TYPE *)((char *)vm + H(i));                                  \
+            out |= nn OP mm;                                                 \
+        } while (i & 63);                                                    \
+        pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK;                            \
+        out &= pg;                                                           \
+        *(uint64_t *)((char *)vd + (i >> 3)) = out;                                  \
+        flags = iter_predtest_bwd(out, pg, flags);                           \
+    } while (i > 0);                                                         \
+    return flags;                                                            \
+}
+
+#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \
+    DO_CMP_PPZZ(NAME, TYPE, OP, H1,   0xffffffffffffffffull)
+#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \
+    DO_CMP_PPZZ(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \
+    DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
+#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \
+    DO_CMP_PPZZ(NAME, TYPE, OP,     , 0x0101010101010101ull)
+
+DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t,  ==)
+DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==)
+DO_CMP_PPZZ_S(sve_cmpeq_ppzz_s, uint32_t, ==)
+DO_CMP_PPZZ_D(sve_cmpeq_ppzz_d, uint64_t, ==)
+
+DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t,  !=)
+DO_CMP_PPZZ_H(sve_cmpne_ppzz_h, uint16_t, !=)
+DO_CMP_PPZZ_S(sve_cmpne_ppzz_s, uint32_t, !=)
+DO_CMP_PPZZ_D(sve_cmpne_ppzz_d, uint64_t, !=)
+
+DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t,  >)
+DO_CMP_PPZZ_H(sve_cmpgt_ppzz_h, int16_t, >)
+DO_CMP_PPZZ_S(sve_cmpgt_ppzz_s, int32_t, >)
+DO_CMP_PPZZ_D(sve_cmpgt_ppzz_d, int64_t, >)
+
+DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t,  >=)
+DO_CMP_PPZZ_H(sve_cmpge_ppzz_h, int16_t, >=)
+DO_CMP_PPZZ_S(sve_cmpge_ppzz_s, int32_t, >=)
+DO_CMP_PPZZ_D(sve_cmpge_ppzz_d, int64_t, >=)
+
+DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t,  >)
+DO_CMP_PPZZ_H(sve_cmphi_ppzz_h, uint16_t, >)
+DO_CMP_PPZZ_S(sve_cmphi_ppzz_s, uint32_t, >)
+DO_CMP_PPZZ_D(sve_cmphi_ppzz_d, uint64_t, >)
+
+DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t,  >=)
+DO_CMP_PPZZ_H(sve_cmphs_ppzz_h, uint16_t, >=)
+DO_CMP_PPZZ_S(sve_cmphs_ppzz_s, uint32_t, >=)
+DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=)
+
+#undef DO_CMP_PPZZ_B
+#undef DO_CMP_PPZZ_H
+#undef DO_CMP_PPZZ_S
+#undef DO_CMP_PPZZ_D
+#undef DO_CMP_PPZZ
+
+/* Similar, but the second source is "wide".  */
+#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK)                     \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{                                                                            \
+    intptr_t opr_sz = simd_oprsz(desc);                                      \
+    uint32_t flags = PREDTEST_INIT;                                          \
+    intptr_t i = opr_sz;                                                     \
+    do {                                                                     \
+        uint64_t out = 0, pg;                                                \
+        do {                                                                 \
+            TYPEW mm = *(TYPEW *)((char *)vm + i - 8);                               \
+            do {                                                             \
+                i -= sizeof(TYPE), out <<= sizeof(TYPE);                     \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                              \
+                out |= nn OP mm;                                             \
+            } while (i & 7);                                                 \
+        } while (i & 63);                                                    \
+        pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK;                            \
+        out &= pg;                                                           \
+        *(uint64_t *)((char *)vd + (i >> 3)) = out;                                  \
+        flags = iter_predtest_bwd(out, pg, flags);                           \
+    } while (i > 0);                                                         \
+    return flags;                                                            \
+}
+
+#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \
+    DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1,   0xffffffffffffffffull)
+#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \
+    DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \
+    DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_4, 0x1111111111111111ull)
+
+DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t,  uint64_t, ==)
+DO_CMP_PPZW_H(sve_cmpeq_ppzw_h, int16_t, uint64_t, ==)
+DO_CMP_PPZW_S(sve_cmpeq_ppzw_s, int32_t, uint64_t, ==)
+
+DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t,  uint64_t, !=)
+DO_CMP_PPZW_H(sve_cmpne_ppzw_h, int16_t, uint64_t, !=)
+DO_CMP_PPZW_S(sve_cmpne_ppzw_s, int32_t, uint64_t, !=)
+
+DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t,   int64_t, >)
+DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t,  int64_t, >)
+DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t,  int64_t, >)
+
+DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t,   int64_t, >=)
+DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t,  int64_t, >=)
+DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t,  int64_t, >=)
+
+DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t,  uint64_t, >)
+DO_CMP_PPZW_H(sve_cmphi_ppzw_h, uint16_t, uint64_t, >)
+DO_CMP_PPZW_S(sve_cmphi_ppzw_s, uint32_t, uint64_t, >)
+
+DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t,  uint64_t, >=)
+DO_CMP_PPZW_H(sve_cmphs_ppzw_h, uint16_t, uint64_t, >=)
+DO_CMP_PPZW_S(sve_cmphs_ppzw_s, uint32_t, uint64_t, >=)
+
+DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t,   int64_t, <)
+DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t,  int64_t, <)
+DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t,  int64_t, <)
+
+DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t,   int64_t, <=)
+DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t,  int64_t, <=)
+DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t,  int64_t, <=)
+
+DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t,  uint64_t, <)
+DO_CMP_PPZW_H(sve_cmplo_ppzw_h, uint16_t, uint64_t, <)
+DO_CMP_PPZW_S(sve_cmplo_ppzw_s, uint32_t, uint64_t, <)
+
+DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t,  uint64_t, <=)
+DO_CMP_PPZW_H(sve_cmpls_ppzw_h, uint16_t, uint64_t, <=)
+DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=)
+
+#undef DO_CMP_PPZW_B
+#undef DO_CMP_PPZW_H
+#undef DO_CMP_PPZW_S
+#undef DO_CMP_PPZW
+
+/* Similar, but the second source is immediate.  */
+#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK)                         \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)   \
+{                                                                    \
+    intptr_t opr_sz = simd_oprsz(desc);                              \
+    uint32_t flags = PREDTEST_INIT;                                  \
+    TYPE mm = simd_data(desc);                                       \
+    intptr_t i = opr_sz;                                             \
+    do {                                                             \
+        uint64_t out = 0, pg;                                        \
+        do {                                                         \
+            i -= sizeof(TYPE), out <<= sizeof(TYPE);                 \
+            TYPE nn = *(TYPE *)((char *)vn + H(i));                          \
+            out |= nn OP mm;                                         \
+        } while (i & 63);                                            \
+        pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK;                    \
+        out &= pg;                                                   \
+        *(uint64_t *)((char *)vd + (i >> 3)) = out;                          \
+        flags = iter_predtest_bwd(out, pg, flags);                   \
+    } while (i > 0);                                                 \
+    return flags;                                                    \
+}
+
+#define DO_CMP_PPZI_B(NAME, TYPE, OP) \
+    DO_CMP_PPZI(NAME, TYPE, OP, H1,   0xffffffffffffffffull)
+#define DO_CMP_PPZI_H(NAME, TYPE, OP) \
+    DO_CMP_PPZI(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZI_S(NAME, TYPE, OP) \
+    DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
+#define DO_CMP_PPZI_D(NAME, TYPE, OP) \
+    DO_CMP_PPZI(NAME, TYPE, OP,     , 0x0101010101010101ull)
+
+DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t,  ==)
+DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==)
+DO_CMP_PPZI_S(sve_cmpeq_ppzi_s, uint32_t, ==)
+DO_CMP_PPZI_D(sve_cmpeq_ppzi_d, uint64_t, ==)
+
+DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t,  !=)
+DO_CMP_PPZI_H(sve_cmpne_ppzi_h, uint16_t, !=)
+DO_CMP_PPZI_S(sve_cmpne_ppzi_s, uint32_t, !=)
+DO_CMP_PPZI_D(sve_cmpne_ppzi_d, uint64_t, !=)
+
+DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t,  >)
+DO_CMP_PPZI_H(sve_cmpgt_ppzi_h, int16_t, >)
+DO_CMP_PPZI_S(sve_cmpgt_ppzi_s, int32_t, >)
+DO_CMP_PPZI_D(sve_cmpgt_ppzi_d, int64_t, >)
+
+DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t,  >=)
+DO_CMP_PPZI_H(sve_cmpge_ppzi_h, int16_t, >=)
+DO_CMP_PPZI_S(sve_cmpge_ppzi_s, int32_t, >=)
+DO_CMP_PPZI_D(sve_cmpge_ppzi_d, int64_t, >=)
+
+DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t,  >)
+DO_CMP_PPZI_H(sve_cmphi_ppzi_h, uint16_t, >)
+DO_CMP_PPZI_S(sve_cmphi_ppzi_s, uint32_t, >)
+DO_CMP_PPZI_D(sve_cmphi_ppzi_d, uint64_t, >)
+
+DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t,  >=)
+DO_CMP_PPZI_H(sve_cmphs_ppzi_h, uint16_t, >=)
+DO_CMP_PPZI_S(sve_cmphs_ppzi_s, uint32_t, >=)
+DO_CMP_PPZI_D(sve_cmphs_ppzi_d, uint64_t, >=)
+
+DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t,  <)
+DO_CMP_PPZI_H(sve_cmplt_ppzi_h, int16_t, <)
+DO_CMP_PPZI_S(sve_cmplt_ppzi_s, int32_t, <)
+DO_CMP_PPZI_D(sve_cmplt_ppzi_d, int64_t, <)
+
+DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t,  <=)
+DO_CMP_PPZI_H(sve_cmple_ppzi_h, int16_t, <=)
+DO_CMP_PPZI_S(sve_cmple_ppzi_s, int32_t, <=)
+DO_CMP_PPZI_D(sve_cmple_ppzi_d, int64_t, <=)
+
+DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t,  <)
+DO_CMP_PPZI_H(sve_cmplo_ppzi_h, uint16_t, <)
+DO_CMP_PPZI_S(sve_cmplo_ppzi_s, uint32_t, <)
+DO_CMP_PPZI_D(sve_cmplo_ppzi_d, uint64_t, <)
+
+DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t,  <=)
+DO_CMP_PPZI_H(sve_cmpls_ppzi_h, uint16_t, <=)
+DO_CMP_PPZI_S(sve_cmpls_ppzi_s, uint32_t, <=)
+DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=)
+
+#undef DO_CMP_PPZI_B
+#undef DO_CMP_PPZI_H
+#undef DO_CMP_PPZI_S
+#undef DO_CMP_PPZI_D
+#undef DO_CMP_PPZI
+
+/* Similar to the ARM LastActive pseudocode function.  */
+static bool last_active_pred(void *vd, void *vg, intptr_t oprsz)
+{
+    intptr_t i;
+
+    for (i = QEMU_ALIGN_UP(oprsz, 8) - 8; i >= 0; i -= 8) {
+        uint64_t pg = *(uint64_t *)((char *)vg + i);
+        if (pg) {
+            return (pow2floor(pg) & *(uint64_t *)((char *)vd + i)) != 0;
+        }
+    }
+    return 0;
+}
+
+/* Compute a mask into RETB that is true for all G, up to and including
+ * (if after) or excluding (if !after) the first G & N.
+ * Return true if BRK found.
+ */
+static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g,
+                        bool brk, bool after)
+{
+    uint64_t b;
+
+    if (brk) {
+        b = 0;
+    } else if ((g & n) == 0) {
+        /* For all G, no N are set; break not found.  */
+        b = g;
+    } else {
+        /* Break somewhere in N.  Locate it.  */
+        b = g & n;            /* guard true, pred true */
+#ifdef _MSC_VER
+        b = b & (0 - b);      /* first such */
+#else
+        b = b & -b;           /* first such */
+#endif
+        if (after) {
+            b = b | (b - 1);  /* break after same */
+        } else {
+            b = b - 1;        /* break before same */
+        }
+        brk = true;
+    }
+
+    *retb = b;
+    return brk;
+}
+
+/* Compute a zeroing BRK.  */
+static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g,
+                          intptr_t oprsz, bool after)
+{
+    bool brk = false;
+    intptr_t i;
+
+    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+        uint64_t this_b, this_g = g[i];
+
+        brk = compute_brk(&this_b, n[i], this_g, brk, after);
+        d[i] = this_b & this_g;
+    }
+}
+
+/* Likewise, but also compute flags.  */
+static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g,
+                               intptr_t oprsz, bool after)
+{
+    uint32_t flags = PREDTEST_INIT;
+    bool brk = false;
+    intptr_t i;
+
+    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+        uint64_t this_b, this_d, this_g = g[i];
+
+        brk = compute_brk(&this_b, n[i], this_g, brk, after);
+        d[i] = this_d = this_b & this_g;
+        flags = iter_predtest_fwd(this_d, this_g, flags);
+    }
+    return flags;
+}
+
+/* Compute a merging BRK.  */
+static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g,
+                          intptr_t oprsz, bool after)
+{
+    bool brk = false;
+    intptr_t i;
+
+    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+        uint64_t this_b, this_g = g[i];
+
+        brk = compute_brk(&this_b, n[i], this_g, brk, after);
+        d[i] = (this_b & this_g) | (d[i] & ~this_g);
+    }
+}
+
+/* Likewise, but also compute flags.  */
+static uint32_t compute_brks_m(uint64_t *d, uint64_t *n, uint64_t *g,
+                               intptr_t oprsz, bool after)
+{
+    uint32_t flags = PREDTEST_INIT;
+    bool brk = false;
+    intptr_t i;
+
+    for (i = 0; i < oprsz / 8; ++i) {
+        uint64_t this_b, this_d = d[i], this_g = g[i];
+
+        brk = compute_brk(&this_b, n[i], this_g, brk, after);
+        d[i] = this_d = (this_b & this_g) | (this_d & ~this_g);
+        flags = iter_predtest_fwd(this_d, this_g, flags);
+    }
+    return flags;
+}
+
+static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz)
+{
+    /* It is quicker to zero the whole predicate than loop on OPRSZ.
+     * The compiler should turn this into 4 64-bit integer stores.
+     */
+    memset(d, 0, sizeof(ARMPredicateReg));
+    return PREDTEST_INIT;
+}
+
+void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg,
+                       uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    if (last_active_pred(vn, vg, oprsz)) {
+        compute_brk_z(vd, vm, vg, oprsz, true);
+    } else {
+        do_zero(vd, oprsz);
+    }
+}
+
+uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    if (last_active_pred(vn, vg, oprsz)) {
+        return compute_brks_z(vd, vm, vg, oprsz, true);
+    } else {
+        return do_zero(vd, oprsz);
+    }
+}
+
+void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg,
+                       uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    if (last_active_pred(vn, vg, oprsz)) {
+        compute_brk_z(vd, vm, vg, oprsz, false);
+    } else {
+        do_zero(vd, oprsz);
+    }
+}
+
+uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    if (last_active_pred(vn, vg, oprsz)) {
+        return compute_brks_z(vd, vm, vg, oprsz, false);
+    } else {
+        return do_zero(vd, oprsz);
+    }
+}
+
+void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    compute_brk_z(vd, vn, vg, oprsz, true);
+}
+
+uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    return compute_brks_z(vd, vn, vg, oprsz, true);
+}
+
+void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    compute_brk_z(vd, vn, vg, oprsz, false);
+}
+
+uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    return compute_brks_z(vd, vn, vg, oprsz, false);
+}
+
+void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    compute_brk_m(vd, vn, vg, oprsz, true);
+}
+
+uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    return compute_brks_m(vd, vn, vg, oprsz, true);
+}
+
+void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    compute_brk_m(vd, vn, vg, oprsz, false);
+}
+
+uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    return compute_brks_m(vd, vn, vg, oprsz, false);
+}
+
+void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+
+    if (!last_active_pred(vn, vg, oprsz)) {
+        do_zero(vd, oprsz);
+    }
+}
+
+/* As if PredTest(Ones(PL), D, esz).  */
+static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz,
+                              uint64_t esz_mask)
+{
+    uint32_t flags = PREDTEST_INIT;
+    intptr_t i;
+
+    for (i = 0; i < oprsz / 8; i++) {
+        flags = iter_predtest_fwd(d->p[i], esz_mask, flags);
+    }
+    if (oprsz & 7) {
+        uint64_t mask = ~(0xffffffffffffffffULL << (8 * (oprsz & 7)));
+        flags = iter_predtest_fwd(d->p[i], esz_mask & mask, flags);
+    }
+    return flags;
+}
+
+uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+
+    if (last_active_pred(vn, vg, oprsz)) {
+        return predtest_ones(vd, oprsz, -1);
+    } else {
+        return do_zero(vd, oprsz);
+    }
+}
+
+uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
+{
+    intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz];
+    intptr_t i;
+
+    for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+        uint64_t t = n[i] & g[i] & mask;
+        sum += ctpop64(t);
+    }
+    return sum;
+}
+
+uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
+{
+    uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
+    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+    uint64_t esz_mask = pred_esz_masks[esz];
+    ARMPredicateReg *d = vd;
+    uint32_t flags;
+    intptr_t i;
+
+    /* Begin with a zero predicate register.  */
+    flags = do_zero(d, oprsz);
+    if (count == 0) {
+        return flags;
+    }
+
+    /* Set all of the requested bits.  */
+    for (i = 0; i < count / 64; ++i) {
+        d->p[i] = esz_mask;
+    }
+    if (count & 63) {
+        d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask;
+    }
+
+    return predtest_ones(d, oprsz, esz_mask);
+}
+
+/* Recursive reduction on a function;
+ * C.f. the ARM ARM function ReducePredicated.
+ *
+ * While it would be possible to write this without the DATA temporary,
+ * it is much simpler to process the predicate register this way.
+ * The recursion is bounded to depth 7 (128 fp16 elements), so there's
+ * little to gain with a more complex non-recursive form.
+ */
+#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT)                         \
+static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
+{                                                                     \
+    if (n == 1) {                                                     \
+        return *data;                                                 \
+    } else {                                                          \
+        uintptr_t half = n / 2;                                       \
+        TYPE lo = NAME##_reduce(data, status, half);                  \
+        TYPE hi = NAME##_reduce(data + half, status, half);           \
+        return TYPE##_##FUNC(lo, hi, status);                         \
+    }                                                                 \
+}                                                                     \
+uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc)    \
+{                                                                     \
+    uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc);  \
+    TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)];                   \
+    for (i = 0; i < oprsz; ) {                                        \
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));               \
+        do {                                                          \
+            TYPE nn = *(TYPE *)((char *)vn + H(i));                           \
+            *(TYPE *)((char *)data + i) = (pg & 1 ? nn : IDENT);      \
+            i += sizeof(TYPE), pg >>= sizeof(TYPE);                   \
+        } while (i & 15);                                             \
+    }                                                                 \
+    for (; i < maxsz; i += sizeof(TYPE)) {                            \
+        *(TYPE *)((char *)data + i) = IDENT;                          \
+    }                                                                 \
+    return NAME##_reduce(data, vs, maxsz / sizeof(TYPE));             \
+}
+
+DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
+DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
+DO_REDUCE(sve_faddv_d, float64,     , add, float64_zero)
+
+/* Identity is floatN_default_nan, without the function call.  */
+DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
+DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
+DO_REDUCE(sve_fminnmv_d, float64,     , minnum, 0x7FF8000000000000ULL)
+
+DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
+DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
+DO_REDUCE(sve_fmaxnmv_d, float64,     , maxnum, 0x7FF8000000000000ULL)
+
+DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
+DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
+DO_REDUCE(sve_fminv_d, float64,     , min, float64_infinity)
+
+DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
+DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
+DO_REDUCE(sve_fmaxv_d, float64,     , max, float64_chs(float64_infinity))
+
+#undef DO_REDUCE
+
+uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg,
+                             void *status, uint32_t desc)
+{
+    intptr_t i = 0, opr_sz = simd_oprsz(desc);
+    float16 result = nn;
+
+    do {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                float16 mm = *(float16 *)((char *)vm + H1_2(i));
+                result = float16_add(result, mm, status);
+            }
+            i += sizeof(float16), pg >>= sizeof(float16);
+        } while (i & 15);
+    } while (i < opr_sz);
+
+    return result;
+}
+
+uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg,
+                             void *status, uint32_t desc)
+{
+    intptr_t i = 0, opr_sz = simd_oprsz(desc);
+    float32 result = nn;
+
+    do {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                float32 mm = *(float32 *)((char *)vm + H1_2(i));
+                result = float32_add(result, mm, status);
+            }
+            i += sizeof(float32), pg >>= sizeof(float32);
+        } while (i & 15);
+    } while (i < opr_sz);
+
+    return result;
+}
+
+uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg,
+                             void *status, uint32_t desc)
+{
+    intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *m = vm;
+    uint8_t *pg = vg;
+
+    for (i = 0; i < opr_sz; i++) {
+        if (pg[H1(i)] & 1) {
+            nn = float64_add(nn, m[i], status);
+        }
+    }
+
+    return nn;
+}
+
+/* Fully general three-operand expander, controlled by a predicate,
+ * With the extra float_status parameter.
+ */
+#define DO_ZPZZ_FP(NAME, TYPE, H, OP)                           \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg,       \
+                  void *status, uint32_t desc)                  \
+{                                                               \
+    intptr_t i = simd_oprsz(desc);                              \
+    uint64_t *g = vg;                                           \
+    do {                                                        \
+        uint64_t pg = g[(i - 1) >> 6];                          \
+        do {                                                    \
+            i -= sizeof(TYPE);                                  \
+            if (likely((pg >> (i & 63)) & 1)) {                 \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                 \
+                TYPE mm = *(TYPE *)((char *)vm + H(i));                 \
+                *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status);      \
+            }                                                   \
+        } while (i & 63);                                       \
+    } while (i != 0);                                           \
+}
+
+DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add)
+DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add)
+DO_ZPZZ_FP(sve_fadd_d, uint64_t,     , float64_add)
+
+DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub)
+DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub)
+DO_ZPZZ_FP(sve_fsub_d, uint64_t,     , float64_sub)
+
+DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul)
+DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul)
+DO_ZPZZ_FP(sve_fmul_d, uint64_t,     , float64_mul)
+
+DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div)
+DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div)
+DO_ZPZZ_FP(sve_fdiv_d, uint64_t,     , float64_div)
+
+DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min)
+DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min)
+DO_ZPZZ_FP(sve_fmin_d, uint64_t,     , float64_min)
+
+DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
+DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
+DO_ZPZZ_FP(sve_fmax_d, uint64_t,     , float64_max)
+
+DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
+DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
+DO_ZPZZ_FP(sve_fminnum_d, uint64_t,     , float64_minnum)
+
+DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum)
+DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum)
+DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t,     , float64_maxnum)
+
+static inline float16 abd_h(float16 a, float16 b, float_status *s)
+{
+    return float16_abs(float16_sub(a, b, s));
+}
+
+static inline float32 abd_s(float32 a, float32 b, float_status *s)
+{
+    return float32_abs(float32_sub(a, b, s));
+}
+
+static inline float64 abd_d(float64 a, float64 b, float_status *s)
+{
+    return float64_abs(float64_sub(a, b, s));
+}
+
+DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
+DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
+DO_ZPZZ_FP(sve_fabd_d, uint64_t,     , abd_d)
+
+static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
+{
+    int b_int = MIN(MAX(b, INT_MIN), INT_MAX);
+    return float64_scalbn(a, b_int, s);
+}
+
+DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn)
+DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn)
+DO_ZPZZ_FP(sve_fscalbn_d, int64_t,     , scalbn_d)
+
+DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh)
+DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs)
+DO_ZPZZ_FP(sve_fmulx_d, uint64_t,     , helper_vfp_mulxd)
+
+#undef DO_ZPZZ_FP
+
+/* Three-operand expander, with one scalar operand, controlled by
+ * a predicate, with the extra float_status parameter.
+ */
+#define DO_ZPZS_FP(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar,  \
+                  void *status, uint32_t desc)                    \
+{                                                                 \
+    intptr_t i = simd_oprsz(desc);                                \
+    uint64_t *g = vg;                                             \
+    TYPE mm = scalar;                                             \
+    do {                                                          \
+        uint64_t pg = g[(i - 1) >> 6];                            \
+        do {                                                      \
+            i -= sizeof(TYPE);                                    \
+            if (likely((pg >> (i & 63)) & 1)) {                   \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                   \
+                *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status);        \
+            }                                                     \
+        } while (i & 63);                                         \
+    } while (i != 0);                                             \
+}
+
+DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add)
+DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add)
+DO_ZPZS_FP(sve_fadds_d, float64,     , float64_add)
+
+DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub)
+DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub)
+DO_ZPZS_FP(sve_fsubs_d, float64,     , float64_sub)
+
+DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul)
+DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul)
+DO_ZPZS_FP(sve_fmuls_d, float64,     , float64_mul)
+
+static inline float16 subr_h(float16 a, float16 b, float_status *s)
+{
+    return float16_sub(b, a, s);
+}
+
+static inline float32 subr_s(float32 a, float32 b, float_status *s)
+{
+    return float32_sub(b, a, s);
+}
+
+static inline float64 subr_d(float64 a, float64 b, float_status *s)
+{
+    return float64_sub(b, a, s);
+}
+
+DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h)
+DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s)
+DO_ZPZS_FP(sve_fsubrs_d, float64,     , subr_d)
+
+DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum)
+DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum)
+DO_ZPZS_FP(sve_fmaxnms_d, float64,     , float64_maxnum)
+
+DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum)
+DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum)
+DO_ZPZS_FP(sve_fminnms_d, float64,     , float64_minnum)
+
+DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max)
+DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max)
+DO_ZPZS_FP(sve_fmaxs_d, float64,     , float64_max)
+
+DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
+DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
+DO_ZPZS_FP(sve_fmins_d, float64,     , float64_min)
+
+/* Fully general two-operand expander, controlled by a predicate,
+ * With the extra float_status parameter.
+ */
+#define DO_ZPZ_FP(NAME, TYPE, H, OP)                                  \
+void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
+{                                                                     \
+    intptr_t i = simd_oprsz(desc);                                    \
+    uint64_t *g = vg;                                                 \
+    do {                                                              \
+        uint64_t pg = g[(i - 1) >> 6];                                \
+        do {                                                          \
+            i -= sizeof(TYPE);                                        \
+            if (likely((pg >> (i & 63)) & 1)) {                       \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                       \
+                *(TYPE *)((char *)vd + H(i)) = OP(nn, status);                \
+            }                                                         \
+        } while (i & 63);                                             \
+    } while (i != 0);                                                 \
+}
+
+/* SVE fp16 conversions always use IEEE mode.  Like AdvSIMD, they ignore
+ * FZ16.  When converting from fp16, this affects flushing input denormals;
+ * when converting to fp16, this affects flushing output denormals.
+ */
+static inline float32 sve_f16_to_f32(float16 f, float_status *fpst)
+{
+    bool save = get_flush_inputs_to_zero(fpst);
+    float32 ret;
+
+    set_flush_inputs_to_zero(false, fpst);
+    ret = float16_to_float32(f, true, fpst);
+    set_flush_inputs_to_zero(save, fpst);
+    return ret;
+}
+
+static inline float64 sve_f16_to_f64(float16 f, float_status *fpst)
+{
+    bool save = get_flush_inputs_to_zero(fpst);
+    float64 ret;
+
+    set_flush_inputs_to_zero(false, fpst);
+    ret = float16_to_float64(f, true, fpst);
+    set_flush_inputs_to_zero(save, fpst);
+    return ret;
+}
+
+static inline float16 sve_f32_to_f16(float32 f, float_status *fpst)
+{
+    bool save = get_flush_to_zero(fpst);
+    float16 ret;
+
+    set_flush_to_zero(false, fpst);
+    ret = float32_to_float16(f, true, fpst);
+    set_flush_to_zero(save, fpst);
+    return ret;
+}
+
+static inline float16 sve_f64_to_f16(float64 f, float_status *fpst)
+{
+    bool save = get_flush_to_zero(fpst);
+    float16 ret;
+
+    set_flush_to_zero(false, fpst);
+    ret = float64_to_float16(f, true, fpst);
+    set_flush_to_zero(save, fpst);
+    return ret;
+}
+
+static inline int16_t vfp_float16_to_int16_rtz(float16 f, float_status *s)
+{
+    if (float16_is_any_nan(f)) {
+        float_raise(float_flag_invalid, s);
+        return 0;
+    }
+    return float16_to_int16_round_to_zero(f, s);
+}
+
+static inline int64_t vfp_float16_to_int64_rtz(float16 f, float_status *s)
+{
+    if (float16_is_any_nan(f)) {
+        float_raise(float_flag_invalid, s);
+        return 0;
+    }
+    return float16_to_int64_round_to_zero(f, s);
+}
+
+static inline int64_t vfp_float32_to_int64_rtz(float32 f, float_status *s)
+{
+    if (float32_is_any_nan(f)) {
+        float_raise(float_flag_invalid, s);
+        return 0;
+    }
+    return float32_to_int64_round_to_zero(f, s);
+}
+
+static inline int64_t vfp_float64_to_int64_rtz(float64 f, float_status *s)
+{
+    if (float64_is_any_nan(f)) {
+        float_raise(float_flag_invalid, s);
+        return 0;
+    }
+    return float64_to_int64_round_to_zero(f, s);
+}
+
+static inline uint16_t vfp_float16_to_uint16_rtz(float16 f, float_status *s)
+{
+    if (float16_is_any_nan(f)) {
+        float_raise(float_flag_invalid, s);
+        return 0;
+    }
+    return float16_to_uint16_round_to_zero(f, s);
+}
+
+static inline uint64_t vfp_float16_to_uint64_rtz(float16 f, float_status *s)
+{
+    if (float16_is_any_nan(f)) {
+        float_raise(float_flag_invalid, s);
+        return 0;
+    }
+    return float16_to_uint64_round_to_zero(f, s);
+}
+
+static inline uint64_t vfp_float32_to_uint64_rtz(float32 f, float_status *s)
+{
+    if (float32_is_any_nan(f)) {
+        float_raise(float_flag_invalid, s);
+        return 0;
+    }
+    return float32_to_uint64_round_to_zero(f, s);
+}
+
+static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s)
+{
+    if (float64_is_any_nan(f)) {
+        float_raise(float_flag_invalid, s);
+        return 0;
+    }
+    return float64_to_uint64_round_to_zero(f, s);
+}
+
+DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16)
+DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32)
+DO_ZPZ_FP(sve_fcvt_dh, uint64_t,     , sve_f64_to_f16)
+DO_ZPZ_FP(sve_fcvt_hd, uint64_t,     , sve_f16_to_f64)
+DO_ZPZ_FP(sve_fcvt_ds, uint64_t,     , float64_to_float32)
+DO_ZPZ_FP(sve_fcvt_sd, uint64_t,     , float32_to_float64)
+
+DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz)
+DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh)
+DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs)
+DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t,     , vfp_float16_to_int64_rtz)
+DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t,     , vfp_float32_to_int64_rtz)
+DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t,     , helper_vfp_tosizd)
+DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t,     , vfp_float64_to_int64_rtz)
+
+DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz)
+DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh)
+DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs)
+DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t,     , vfp_float16_to_uint64_rtz)
+DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t,     , vfp_float32_to_uint64_rtz)
+DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t,     , helper_vfp_touizd)
+DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t,     , vfp_float64_to_uint64_rtz)
+
+DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth)
+DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints)
+DO_ZPZ_FP(sve_frint_d, uint64_t,     , helper_rintd)
+
+DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int)
+DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int)
+DO_ZPZ_FP(sve_frintx_d, uint64_t,     , float64_round_to_int)
+
+DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16)
+DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32)
+DO_ZPZ_FP(sve_frecpx_d, uint64_t,     , helper_frecpx_f64)
+
+DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt)
+DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt)
+DO_ZPZ_FP(sve_fsqrt_d, uint64_t,     , float64_sqrt)
+
+DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16)
+DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16)
+DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32)
+DO_ZPZ_FP(sve_scvt_sd, uint64_t,     , int32_to_float64)
+DO_ZPZ_FP(sve_scvt_dh, uint64_t,     , int64_to_float16)
+DO_ZPZ_FP(sve_scvt_ds, uint64_t,     , int64_to_float32)
+DO_ZPZ_FP(sve_scvt_dd, uint64_t,     , int64_to_float64)
+
+DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16)
+DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16)
+DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32)
+DO_ZPZ_FP(sve_ucvt_sd, uint64_t,     , uint32_to_float64)
+DO_ZPZ_FP(sve_ucvt_dh, uint64_t,     , uint64_to_float16)
+DO_ZPZ_FP(sve_ucvt_ds, uint64_t,     , uint64_to_float32)
+DO_ZPZ_FP(sve_ucvt_dd, uint64_t,     , uint64_to_float64)
+
+#undef DO_ZPZ_FP
+
+static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
+                            float_status *status, uint32_t desc,
+                            uint16_t neg1, uint16_t neg3)
+{
+    intptr_t i = simd_oprsz(desc);
+    uint64_t *g = vg;
+
+    do {
+        uint64_t pg = g[(i - 1) >> 6];
+        do {
+            i -= 2;
+            if (likely((pg >> (i & 63)) & 1)) {
+                float16 e1, e2, e3, r;
+
+                e1 = *(uint16_t *)((char *)vn + H1_2(i)) ^ neg1;
+                e2 = *(uint16_t *)((char *)vm + H1_2(i));
+                e3 = *(uint16_t *)((char *)va + H1_2(i)) ^ neg3;
+                r = float16_muladd(e1, e2, e3, 0, status);
+                *(uint16_t *)((char *)vd + H1_2(i)) = r;
+            }
+        } while (i & 63);
+    } while (i != 0);
+}
+
+void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+                              void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
+}
+
+void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+                              void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
+}
+
+void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+                               void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
+}
+
+void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+                               void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
+}
+
+static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
+                            float_status *status, uint32_t desc,
+                            uint32_t neg1, uint32_t neg3)
+{
+    intptr_t i = simd_oprsz(desc);
+    uint64_t *g = vg;
+
+    do {
+        uint64_t pg = g[(i - 1) >> 6];
+        do {
+            i -= 4;
+            if (likely((pg >> (i & 63)) & 1)) {
+                float32 e1, e2, e3, r;
+
+                e1 = *(uint32_t *)((char *)vn + H1_4(i)) ^ neg1;
+                e2 = *(uint32_t *)((char *)vm + H1_4(i));
+                e3 = *(uint32_t *)((char *)va + H1_4(i)) ^ neg3;
+                r = float32_muladd(e1, e2, e3, 0, status);
+                *(uint32_t *)((char *)vd + H1_4(i)) = r;
+            }
+        } while (i & 63);
+    } while (i != 0);
+}
+
+void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+                              void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
+}
+
+void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+                              void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
+}
+
+void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+                               void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
+}
+
+void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+                               void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
+}
+
+static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
+                            float_status *status, uint32_t desc,
+                            uint64_t neg1, uint64_t neg3)
+{
+    intptr_t i = simd_oprsz(desc);
+    uint64_t *g = vg;
+
+    do {
+        uint64_t pg = g[(i - 1) >> 6];
+        do {
+            i -= 8;
+            if (likely((pg >> (i & 63)) & 1)) {
+                float64 e1, e2, e3, r;
+
+                e1 = *(uint64_t *)((char *)vn + i) ^ neg1;
+                e2 = *(uint64_t *)((char *)vm + i);
+                e3 = *(uint64_t *)((char *)va + i) ^ neg3;
+                r = float64_muladd(e1, e2, e3, 0, status);
+                *(uint64_t *)((char *)vd + i) = r;
+            }
+        } while (i & 63);
+    } while (i != 0);
+}
+
+void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+                              void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
+}
+
+void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+                              void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
+}
+
+void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+                               void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
+}
+
+void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+                               void *vg, void *status, uint32_t desc)
+{
+    do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
+}
+
+/* Two operand floating-point comparison controlled by a predicate.
+ * Unlike the integer version, we are not allowed to optimistically
+ * compare operands, since the comparison may have side effects wrt
+ * the FPSR.
+ */
+#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP)                                \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg,               \
+                  void *status, uint32_t desc)                          \
+{                                                                       \
+    intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6;                    \
+    uint64_t *d = vd, *g = vg;                                          \
+    do {                                                                \
+        uint64_t out = 0, pg = g[j];                                    \
+        do {                                                            \
+            i -= sizeof(TYPE), out <<= sizeof(TYPE);                    \
+            if (likely((pg >> (i & 63)) & 1)) {                         \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                         \
+                TYPE mm = *(TYPE *)((char *)vm + H(i));                         \
+                out |= OP(TYPE, nn, mm, status);                        \
+            }                                                           \
+        } while (i & 63);                                               \
+        d[j--] = out;                                                   \
+    } while (i > 0);                                                    \
+}
+
+#define DO_FPCMP_PPZZ_H(NAME, OP) \
+    DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP)
+#define DO_FPCMP_PPZZ_S(NAME, OP) \
+    DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP)
+#define DO_FPCMP_PPZZ_D(NAME, OP) \
+    DO_FPCMP_PPZZ(NAME##_d, float64,     , OP)
+
+#define DO_FPCMP_PPZZ_ALL(NAME, OP) \
+    DO_FPCMP_PPZZ_H(NAME, OP)   \
+    DO_FPCMP_PPZZ_S(NAME, OP)   \
+    DO_FPCMP_PPZZ_D(NAME, OP)
+
+#define DO_FCMGE(TYPE, X, Y, ST)  TYPE##_compare(Y, X, ST) <= 0
+#define DO_FCMGT(TYPE, X, Y, ST)  TYPE##_compare(Y, X, ST) < 0
+#define DO_FCMLE(TYPE, X, Y, ST)  TYPE##_compare(X, Y, ST) <= 0
+#define DO_FCMLT(TYPE, X, Y, ST)  TYPE##_compare(X, Y, ST) < 0
+#define DO_FCMEQ(TYPE, X, Y, ST)  TYPE##_compare_quiet(X, Y, ST) == 0
+#define DO_FCMNE(TYPE, X, Y, ST)  TYPE##_compare_quiet(X, Y, ST) != 0
+#define DO_FCMUO(TYPE, X, Y, ST)  \
+    TYPE##_compare_quiet(X, Y, ST) == float_relation_unordered
+#define DO_FACGE(TYPE, X, Y, ST)  \
+    TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) <= 0
+#define DO_FACGT(TYPE, X, Y, ST)  \
+    TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) < 0
+
+DO_FPCMP_PPZZ_ALL(sve_fcmge, DO_FCMGE)
+DO_FPCMP_PPZZ_ALL(sve_fcmgt, DO_FCMGT)
+DO_FPCMP_PPZZ_ALL(sve_fcmeq, DO_FCMEQ)
+DO_FPCMP_PPZZ_ALL(sve_fcmne, DO_FCMNE)
+DO_FPCMP_PPZZ_ALL(sve_fcmuo, DO_FCMUO)
+DO_FPCMP_PPZZ_ALL(sve_facge, DO_FACGE)
+DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT)
+
+#undef DO_FPCMP_PPZZ_ALL
+#undef DO_FPCMP_PPZZ_D
+#undef DO_FPCMP_PPZZ_S
+#undef DO_FPCMP_PPZZ_H
+#undef DO_FPCMP_PPZZ
+
+/* One operand floating-point comparison against zero, controlled
+ * by a predicate.
+ */
+#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP)                   \
+void HELPER(NAME)(void *vd, void *vn, void *vg,            \
+                  void *status, uint32_t desc)             \
+{                                                          \
+    intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6;       \
+    uint64_t *d = vd, *g = vg;                             \
+    do {                                                   \
+        uint64_t out = 0, pg = g[j];                       \
+        do {                                               \
+            i -= sizeof(TYPE), out <<= sizeof(TYPE);       \
+            if ((pg >> (i & 63)) & 1) {                    \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));            \
+                out |= OP(TYPE, nn, 0, status);            \
+            }                                              \
+        } while (i & 63);                                  \
+        d[j--] = out;                                      \
+    } while (i > 0);                                       \
+}
+
+#define DO_FPCMP_PPZ0_H(NAME, OP) \
+    DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP)
+#define DO_FPCMP_PPZ0_S(NAME, OP) \
+    DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP)
+#define DO_FPCMP_PPZ0_D(NAME, OP) \
+    DO_FPCMP_PPZ0(NAME##_d, float64,     , OP)
+
+#define DO_FPCMP_PPZ0_ALL(NAME, OP) \
+    DO_FPCMP_PPZ0_H(NAME, OP)   \
+    DO_FPCMP_PPZ0_S(NAME, OP)   \
+    DO_FPCMP_PPZ0_D(NAME, OP)
+
+DO_FPCMP_PPZ0_ALL(sve_fcmge0, DO_FCMGE)
+DO_FPCMP_PPZ0_ALL(sve_fcmgt0, DO_FCMGT)
+DO_FPCMP_PPZ0_ALL(sve_fcmle0, DO_FCMLE)
+DO_FPCMP_PPZ0_ALL(sve_fcmlt0, DO_FCMLT)
+DO_FPCMP_PPZ0_ALL(sve_fcmeq0, DO_FCMEQ)
+DO_FPCMP_PPZ0_ALL(sve_fcmne0, DO_FCMNE)
+
+/* FP Trig Multiply-Add. */
+
+void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+{
+    static const float16 coeff[16] = {
+        0x3c00, 0xb155, 0x2030, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    };
+    intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16);
+    intptr_t x = simd_data(desc);
+    float16 *d = vd, *n = vn, *m = vm;
+    for (i = 0; i < opr_sz; i++) {
+        float16 mm = m[i];
+        intptr_t xx = x;
+        if (float16_is_neg(mm)) {
+            mm = float16_abs(mm);
+            xx += 8;
+        }
+        d[i] = float16_muladd(n[i], mm, coeff[xx], 0, vs);
+    }
+}
+
+void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+{
+    static const float32 coeff[16] = {
+        0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9,
+        0x36369d6d, 0x00000000, 0x00000000, 0x00000000,
+        0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705,
+        0x37cd37cc, 0x00000000, 0x00000000, 0x00000000,
+    };
+    intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32);
+    intptr_t x = simd_data(desc);
+    float32 *d = vd, *n = vn, *m = vm;
+    for (i = 0; i < opr_sz; i++) {
+        float32 mm = m[i];
+        intptr_t xx = x;
+        if (float32_is_neg(mm)) {
+            mm = float32_abs(mm);
+            xx += 8;
+        }
+        d[i] = float32_muladd(n[i], mm, coeff[xx], 0, vs);
+    }
+}
+
+void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+{
+    static const float64 coeff[16] = {
+        0x3ff0000000000000ull, 0xbfc5555555555543ull,
+        0x3f8111111110f30cull, 0xbf2a01a019b92fc6ull,
+        0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull,
+        0x3de5d8408868552full, 0x0000000000000000ull,
+        0x3ff0000000000000ull, 0xbfe0000000000000ull,
+        0x3fa5555555555536ull, 0xbf56c16c16c13a0bull,
+        0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull,
+        0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull,
+    };
+    intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64);
+    intptr_t x = simd_data(desc);
+    float64 *d = vd, *n = vn, *m = vm;
+    for (i = 0; i < opr_sz; i++) {
+        float64 mm = m[i];
+        intptr_t xx = x;
+        if (float64_is_neg(mm)) {
+            mm = float64_abs(mm);
+            xx += 8;
+        }
+        d[i] = float64_muladd(n[i], mm, coeff[xx], 0, vs);
+    }
+}
+
+/*
+ * FP Complex Add
+ */
+
+void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
+                         void *vs, uint32_t desc)
+{
+    intptr_t j, i = simd_oprsz(desc);
+    uint64_t *g = vg;
+    float16 neg_imag = float16_set_sign(0, simd_data(desc));
+    float16 neg_real = float16_chs(neg_imag);
+
+    do {
+        uint64_t pg = g[(i - 1) >> 6];
+        do {
+            float16 e0, e1, e2, e3;
+
+            /* I holds the real index; J holds the imag index.  */
+            j = i - sizeof(float16);
+            i -= 2 * sizeof(float16);
+
+            e0 = *(float16 *)((char *)vn + H1_2(i));
+            e1 = *(float16 *)((char *)vm + H1_2(j)) ^ neg_real;
+            e2 = *(float16 *)((char *)vn + H1_2(j));
+            e3 = *(float16 *)((char *)vm + H1_2(i)) ^ neg_imag;
+
+            if (likely((pg >> (i & 63)) & 1)) {
+                *(float16 *)((char *)vd + H1_2(i)) = float16_add(e0, e1, vs);
+            }
+            if (likely((pg >> (j & 63)) & 1)) {
+                *(float16 *)((char *)vd + H1_2(j)) = float16_add(e2, e3, vs);
+            }
+        } while (i & 63);
+    } while (i != 0);
+}
+
+void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
+                         void *vs, uint32_t desc)
+{
+    intptr_t j, i = simd_oprsz(desc);
+    uint64_t *g = vg;
+    float32 neg_imag = float32_set_sign(0, simd_data(desc));
+    float32 neg_real = float32_chs(neg_imag);
+
+    do {
+        uint64_t pg = g[(i - 1) >> 6];
+        do {
+            float32 e0, e1, e2, e3;
+
+            /* I holds the real index; J holds the imag index.  */
+            j = i - sizeof(float32);
+            i -= 2 * sizeof(float32);
+
+            e0 = *(float32 *)((char *)vn + H1_2(i));
+            e1 = *(float32 *)((char *)vm + H1_2(j)) ^ neg_real;
+            e2 = *(float32 *)((char *)vn + H1_2(j));
+            e3 = *(float32 *)((char *)vm + H1_2(i)) ^ neg_imag;
+
+            if (likely((pg >> (i & 63)) & 1)) {
+                *(float32 *)((char *)vd + H1_2(i)) = float32_add(e0, e1, vs);
+            }
+            if (likely((pg >> (j & 63)) & 1)) {
+                *(float32 *)((char *)vd + H1_2(j)) = float32_add(e2, e3, vs);
+            }
+        } while (i & 63);
+    } while (i != 0);
+}
+
+void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
+                         void *vs, uint32_t desc)
+{
+    intptr_t j, i = simd_oprsz(desc);
+    uint64_t *g = vg;
+    float64 neg_imag = float64_set_sign(0, simd_data(desc));
+    float64 neg_real = float64_chs(neg_imag);
+
+    do {
+        uint64_t pg = g[(i - 1) >> 6];
+        do {
+            float64 e0, e1, e2, e3;
+
+            /* I holds the real index; J holds the imag index.  */
+            j = i - sizeof(float64);
+            i -= 2 * sizeof(float64);
+
+            e0 = *(float64 *)((char *)vn + H1_2(i));
+            e1 = *(float64 *)((char *)vm + H1_2(j)) ^ neg_real;
+            e2 = *(float64 *)((char *)vn + H1_2(j));
+            e3 = *(float64 *)((char *)vm + H1_2(i)) ^ neg_imag;
+
+            if (likely((pg >> (i & 63)) & 1)) {
+                *(float64 *)((char *)vd + H1_2(i)) = float64_add(e0, e1, vs);
+            }
+            if (likely((pg >> (j & 63)) & 1)) {
+                *(float64 *)((char *)vd + H1_2(j)) = float64_add(e2, e3, vs);
+            }
+        } while (i & 63);
+    } while (i != 0);
+}
+
+/*
+ * FP Complex Multiply
+ */
+
+void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+                               void *vg, void *status, uint32_t desc)
+{
+    intptr_t j, i = simd_oprsz(desc);
+    unsigned rot = simd_data(desc);
+    bool flip = rot & 1;
+    float16 neg_imag, neg_real;
+    uint64_t *g = vg;
+
+    neg_imag = float16_set_sign(0, (rot & 2) != 0);
+    neg_real = float16_set_sign(0, rot == 1 || rot == 2);
+
+    do {
+        uint64_t pg = g[(i - 1) >> 6];
+        do {
+            float16 e1, e2, e3, e4, nr, ni, mr, mi, d;
+
+            /* I holds the real index; J holds the imag index.  */
+            j = i - sizeof(float16);
+            i -= 2 * sizeof(float16);
+
+            nr = *(float16 *)((char *)vn + H1_2(i));
+            ni = *(float16 *)((char *)vn + H1_2(j));
+            mr = *(float16 *)((char *)vm + H1_2(i));
+            mi = *(float16 *)((char *)vm + H1_2(j));
+
+            e2 = (flip ? ni : nr);
+            e1 = (flip ? mi : mr) ^ neg_real;
+            e4 = e2;
+            e3 = (flip ? mr : mi) ^ neg_imag;
+
+            if (likely((pg >> (i & 63)) & 1)) {
+                d = *(float16 *)((char *)va + H1_2(i));
+                d = float16_muladd(e2, e1, d, 0, status);
+                *(float16 *)((char *)vd + H1_2(i)) = d;
+            }
+            if (likely((pg >> (j & 63)) & 1)) {
+                d = *(float16 *)((char *)va + H1_2(j));
+                d = float16_muladd(e4, e3, d, 0, status);
+                *(float16 *)((char *)vd + H1_2(j)) = d;
+            }
+        } while (i & 63);
+    } while (i != 0);
+}
+
+void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+                               void *vg, void *status, uint32_t desc)
+{
+    intptr_t j, i = simd_oprsz(desc);
+    unsigned rot = simd_data(desc);
+    bool flip = rot & 1;
+    float32 neg_imag, neg_real;
+    uint64_t *g = vg;
+
+    neg_imag = float32_set_sign(0, (rot & 2) != 0);
+    neg_real = float32_set_sign(0, rot == 1 || rot == 2);
+
+    do {
+        uint64_t pg = g[(i - 1) >> 6];
+        do {
+            float32 e1, e2, e3, e4, nr, ni, mr, mi, d;
+
+            /* I holds the real index; J holds the imag index.  */
+            j = i - sizeof(float32);
+            i -= 2 * sizeof(float32);
+
+            nr = *(float32 *)((char *)vn + H1_2(i));
+            ni = *(float32 *)((char *)vn + H1_2(j));
+            mr = *(float32 *)((char *)vm + H1_2(i));
+            mi = *(float32 *)((char *)vm + H1_2(j));
+
+            e2 = (flip ? ni : nr);
+            e1 = (flip ? mi : mr) ^ neg_real;
+            e4 = e2;
+            e3 = (flip ? mr : mi) ^ neg_imag;
+
+            if (likely((pg >> (i & 63)) & 1)) {
+                d = *(float32 *)((char *)va + H1_2(i));
+                d = float32_muladd(e2, e1, d, 0, status);
+                *(float32 *)((char *)vd + H1_2(i)) = d;
+            }
+            if (likely((pg >> (j & 63)) & 1)) {
+                d = *(float32 *)((char *)va + H1_2(j));
+                d = float32_muladd(e4, e3, d, 0, status);
+                *(float32 *)((char *)vd + H1_2(j)) = d;
+            }
+        } while (i & 63);
+    } while (i != 0);
+}
+
+void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+                               void *vg, void *status, uint32_t desc)
+{
+    intptr_t j, i = simd_oprsz(desc);
+    unsigned rot = simd_data(desc);
+    bool flip = rot & 1;
+    float64 neg_imag, neg_real;
+    uint64_t *g = vg;
+
+    neg_imag = float64_set_sign(0, (rot & 2) != 0);
+    neg_real = float64_set_sign(0, rot == 1 || rot == 2);
+
+    do {
+        uint64_t pg = g[(i - 1) >> 6];
+        do {
+            float64 e1, e2, e3, e4, nr, ni, mr, mi, d;
+
+            /* I holds the real index; J holds the imag index.  */
+            j = i - sizeof(float64);
+            i -= 2 * sizeof(float64);
+
+            nr = *(float64 *)((char *)vn + H1_2(i));
+            ni = *(float64 *)((char *)vn + H1_2(j));
+            mr = *(float64 *)((char *)vm + H1_2(i));
+            mi = *(float64 *)((char *)vm + H1_2(j));
+
+            e2 = (flip ? ni : nr);
+            e1 = (flip ? mi : mr) ^ neg_real;
+            e4 = e2;
+            e3 = (flip ? mr : mi) ^ neg_imag;
+
+            if (likely((pg >> (i & 63)) & 1)) {
+                d = *(float64 *)((char *)va + H1_2(i));
+                d = float64_muladd(e2, e1, d, 0, status);
+                *(float64 *)((char *)vd + H1_2(i)) = d;
+            }
+            if (likely((pg >> (j & 63)) & 1)) {
+                d = *(float64 *)((char *)va + H1_2(j));
+                d = float64_muladd(e4, e3, d, 0, status);
+                *(float64 *)((char *)vd + H1_2(j)) = d;
+            }
+        } while (i & 63);
+    } while (i != 0);
+}
+
+/*
+ * Load contiguous data, protected by a governing predicate.
+ */
+
+/*
+ * Load one element into @vd + @reg_off from @host.
+ * The controlling predicate is known to be true.
+ */
+typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host);
+
+/*
+ * Load one element into @vd + @reg_off from (@env, @vaddr, @ra).
+ * The controlling predicate is known to be true.
+ */
+typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
+                              target_ulong vaddr, uintptr_t retaddr);
+
+/*
+ * Generate the above primitives.
+ */
+
+#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \
+static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host)  \
+{                                                                      \
+    TYPEM val = HOST(host);                                            \
+    *(TYPEE *)(vd + H(reg_off)) = val;                                 \
+}
+
+#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \
+static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host)  \
+{ HOST(host, (TYPEM)*(TYPEE *)(vd + H(reg_off))); }
+
+#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
+                             target_ulong addr, uintptr_t ra)               \
+{                                                                           \
+    *(TYPEE *)(vd + H(reg_off)) =                                           \
+        (TYPEM)TLB(env, useronly_clean_ptr(addr), ra);                      \
+}
+
+#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
+                             target_ulong addr, uintptr_t ra)               \
+{                                                                           \
+    TLB(env, useronly_clean_ptr(addr),                                      \
+        (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra);                            \
+}
+
+
+#define DO_LD_PRIM_1(NAME, H, TE, TM)                   \
+    DO_LD_HOST(NAME, H, TE, TM, ldub_p)                 \
+    DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra)
+
+DO_LD_PRIM_1(ld1bb,  H1,   uint8_t,  uint8_t)
+DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
+DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t,  int8_t)
+DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
+DO_LD_PRIM_1(ld1bss, H1_4, uint32_t,  int8_t)
+DO_LD_PRIM_1(ld1bdu,     , uint64_t, uint8_t)
+DO_LD_PRIM_1(ld1bds,     , uint64_t,  int8_t)
+
+#define DO_ST_PRIM_1(NAME, H, TE, TM)                   \
+    DO_ST_HOST(st1##NAME, H, TE, TM, stb_p)             \
+    DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra)
+
+DO_ST_PRIM_1(bb,   H1,  uint8_t, uint8_t)
+DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t)
+DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t)
+DO_ST_PRIM_1(bd,     , uint64_t, uint8_t)
+
+#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \
+    DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p)    \
+    DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p)    \
+    DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \
+    DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra)
+
+#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \
+    DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p)    \
+    DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p)    \
+    DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \
+    DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra)
+
+DO_LD_PRIM_2(hh,  H1_2, uint16_t, uint16_t, lduw)
+DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw)
+DO_LD_PRIM_2(hss, H1_4, uint32_t,  int16_t, lduw)
+DO_LD_PRIM_2(hdu,     , uint64_t, uint16_t, lduw)
+DO_LD_PRIM_2(hds,     , uint64_t,  int16_t, lduw)
+
+DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw)
+DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw)
+DO_ST_PRIM_2(hd,     , uint64_t, uint16_t, stw)
+
+DO_LD_PRIM_2(ss,  H1_4, uint32_t, uint32_t, ldl)
+DO_LD_PRIM_2(sdu,     , uint64_t, uint32_t, ldl)
+DO_LD_PRIM_2(sds,     , uint64_t,  int32_t, ldl)
+
+DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl)
+DO_ST_PRIM_2(sd,     , uint64_t, uint32_t, stl)
+
+DO_LD_PRIM_2(dd,     , uint64_t, uint64_t, ldq)
+DO_ST_PRIM_2(dd,     , uint64_t, uint64_t, stq)
+
+#undef DO_LD_TLB
+#undef DO_ST_TLB
+#undef DO_LD_HOST
+#undef DO_LD_PRIM_1
+#undef DO_ST_PRIM_1
+#undef DO_LD_PRIM_2
+#undef DO_ST_PRIM_2
+
+/*
+ * Skip through a sequence of inactive elements in the guarding predicate @vg,
+ * beginning at @reg_off bounded by @reg_max.  Return the offset of the active
+ * element >= @reg_off, or @reg_max if there were no active elements at all.
+ */
+static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off,
+                                 intptr_t reg_max, int esz)
+{
+    uint64_t pg_mask = pred_esz_masks[esz];
+    uint64_t pg = (vg[reg_off >> 6] & pg_mask) >> (reg_off & 63);
+
+    /* In normal usage, the first element is active.  */
+    if (likely(pg & 1)) {
+        return reg_off;
+    }
+
+    if (pg == 0) {
+        reg_off &= -64;
+        do {
+            reg_off += 64;
+            if (unlikely(reg_off >= reg_max)) {
+                /* The entire predicate was false.  */
+                return reg_max;
+            }
+            pg = vg[reg_off >> 6] & pg_mask;
+        } while (pg == 0);
+    }
+    reg_off += ctz64(pg);
+
+    /* We should never see an out of range predicate bit set.  */
+    tcg_debug_assert(reg_off < reg_max);
+    return reg_off;
+}
+
+/*
+ * Resolve the guest virtual address to info->host and info->flags.
+ * If @nofault, return false if the page is invalid, otherwise
+ * exit via page fault exception.
+ */
+
+typedef struct {
+    void *host;
+    int flags;
+    MemTxAttrs attrs;
+} SVEHostPage;
+
+static bool sve_probe_page(SVEHostPage *info, bool nofault,
+                           CPUARMState *env, target_ulong addr,
+                           int mem_off, MMUAccessType access_type,
+                           int mmu_idx, uintptr_t retaddr)
+{
+    int flags;
+
+    addr += mem_off;
+
+    /*
+     * User-only currently always issues with TBI.  See the comment
+     * above useronly_clean_ptr.  Usually we clean this top byte away
+     * during translation, but we can't do that for e.g. vector + imm
+     * addressing modes.
+     *
+     * We currently always enable TBI for user-only, and do not provide
+     * a way to turn it off.  So clean the pointer unconditionally here,
+     * rather than look it up here, or pass it down from above.
+     */
+    addr = useronly_clean_ptr(addr);
+
+    flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault,
+                               &info->host, retaddr);
+    info->flags = flags;
+
+    if (flags & TLB_INVALID_MASK) {
+        g_assert(nofault);
+        return false;
+    }
+
+    /* Ensure that info->host[] is relative to addr, not addr + mem_off. */
+    info->host -= mem_off;
+
+    /*
+     * Find the iotlbentry for addr and return the transaction attributes.
+     * This *must* be present in the TLB because we just found the mapping.
+     */
+    {
+        uintptr_t index = tlb_index(env, mmu_idx, addr);
+
+# ifdef CONFIG_DEBUG_TCG
+        CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+        target_ulong comparator = (access_type == MMU_DATA_LOAD
+                                   ? entry->addr_read
+                                   : tlb_addr_write(entry));
+        g_assert(tlb_hit(comparator, addr));
+# endif
+
+        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+        info->attrs = iotlbentry->attrs;
+    }
+
+    return true;
+}
+
+
+/*
+ * Analyse contiguous data, protected by a governing predicate.
+ */
+
+typedef enum {
+    FAULT_NO,
+    FAULT_FIRST,
+    FAULT_ALL,
+} SVEContFault;
+
+typedef struct {
+    /*
+     * First and last element wholly contained within the two pages.
+     * mem_off_first[0] and reg_off_first[0] are always set >= 0.
+     * reg_off_last[0] may be < 0 if the first element crosses pages.
+     * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1]
+     * are set >= 0 only if there are complete elements on a second page.
+     *
+     * The reg_off_* offsets are relative to the internal vector register.
+     * The mem_off_first offset is relative to the memory address; the
+     * two offsets are different when a load operation extends, a store
+     * operation truncates, or for multi-register operations.
+     */
+    int16_t mem_off_first[2];
+    int16_t reg_off_first[2];
+    int16_t reg_off_last[2];
+
+    /*
+     * One element that is misaligned and spans both pages,
+     * or -1 if there is no such active element.
+     */
+    int16_t mem_off_split;
+    int16_t reg_off_split;
+
+    /*
+     * The byte offset at which the entire operation crosses a page boundary.
+     * Set >= 0 if and only if the entire operation spans two pages.
+     */
+    int16_t page_split;
+
+    /* TLB data for the two pages. */
+    SVEHostPage page[2];
+} SVEContLdSt;
+
+/*
+ * Common helper for all contiguous one-register predicated loads.
+ */
+static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
+                      uint32_t desc, const uintptr_t retaddr,
+                      const int esz, const int msz,
+                      sve_ld1_host_fn *host_fn,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    void *vd = &env->vfp.zregs[rd];
+    const int diffsz = esz - msz;
+    const intptr_t reg_max = simd_oprsz(desc);
+    const intptr_t mem_max = reg_max >> diffsz;
+    ARMVectorReg scratch;
+    void *host;
+    intptr_t split, reg_off, mem_off;
+
+    /* Find the first active element.  */
+    reg_off = find_next_active(vg, 0, reg_max, esz);
+    if (unlikely(reg_off == reg_max)) {
+        /* The entire predicate was false; no load occurs.  */
+        memset(vd, 0, reg_max);
+        return;
+    }
+    mem_off = reg_off >> diffsz;
+    set_helper_retaddr(retaddr);
+
+    /*
+     * If the (remaining) load is entirely within a single page, then:
+     * For softmmu, and the tlb hits, then no faults will occur;
+     * For user-only, either the first load will fault or none will.
+     * We can thus perform the load directly to the destination and
+     * Vd will be unmodified on any exception path.
+     */
+    split = max_for_page(env->uc, addr, mem_off, mem_max);
+    if (likely(split == mem_max)) {
+        host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+        if (test_host_page(host)) {
+            mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max);
+            tcg_debug_assert(mem_off == mem_max);
+            clear_helper_retaddr();
+            /* After having taken any fault, zero leading inactive elements. */
+            swap_memzero(vd, reg_off);
+            return;
+        }
+    }
+
+    /*
+     * Perform the predicated read into a temporary, thus ensuring
+     * if the load of the last element faults, Vd is not modified.
+     */
+    memset(&scratch, 0, reg_max);
+    goto start;
+    while (1) {
+        reg_off = find_next_active(vg, reg_off, reg_max, esz);
+        if (reg_off >= reg_max) {
+            break;
+        }
+        mem_off = reg_off >> diffsz;
+        split = max_for_page(env->uc, addr, mem_off, mem_max);
+
+    start:
+        if (split - mem_off >= (1ULL << msz)) {
+            /* At least one whole element on this page.  */
+            host = tlb_vaddr_to_host(env, addr + mem_off,
+                                     MMU_DATA_LOAD, mmu_idx);
+            if (host) {
+                mem_off = host_fn(&scratch, vg, (char *)host - mem_off,
+                                  mem_off, split);
+                reg_off = mem_off << diffsz;
+                continue;
+            }
+        }
+
+        /*
+         * Perform one normal read.  This may fault, longjmping out to the
+         * main loop in order to raise an exception.  It may succeed, and
+         * as a side-effect load the TLB entry for the next round.  Finally,
+         * in the extremely unlikely case we're performing this operation
+         * on I/O memory, it may succeed but not bring in the TLB entry.
+         * But even then we have still made forward progress.
+         */
+        tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr);
+        reg_off += 1ULL << esz;
+    }
+
+    clear_helper_retaddr();
+    memcpy(vd, &scratch, reg_max);
+}
+
+#define DO_LD1_1(NAME, ESZ) \
+void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg,        \
+                            target_ulong addr, uint32_t desc)  \
+{                                                              \
+    sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0,            \
+              sve_##NAME##_host, sve_##NAME##_tlb);            \
+}
+
+#define DO_LD1_2(NAME, ESZ, MSZ) \
+void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg,        \
+                               target_ulong addr, uint32_t desc)  \
+{                                                                 \
+    sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,             \
+              sve_##NAME##_le_host, sve_##NAME##_le_tlb);         \
+}                                                                 \
+void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg,        \
+                               target_ulong addr, uint32_t desc)  \
+{                                                                 \
+    sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,             \
+              sve_##NAME##_be_host, sve_##NAME##_be_tlb);         \
+}
+
+DO_LD1_1(ld1bb,  0)
+DO_LD1_1(ld1bhu, 1)
+DO_LD1_1(ld1bhs, 1)
+DO_LD1_1(ld1bsu, 2)
+DO_LD1_1(ld1bss, 2)
+DO_LD1_1(ld1bdu, 3)
+DO_LD1_1(ld1bds, 3)
+
+DO_LD1_2(ld1hh,  1, 1)
+DO_LD1_2(ld1hsu, 2, 1)
+DO_LD1_2(ld1hss, 2, 1)
+DO_LD1_2(ld1hdu, 3, 1)
+DO_LD1_2(ld1hds, 3, 1)
+
+DO_LD1_2(ld1ss,  2, 2)
+DO_LD1_2(ld1sdu, 3, 2)
+DO_LD1_2(ld1sds, 3, 2)
+
+DO_LD1_2(ld1dd,  3, 3)
+
+#undef DO_LD1_1
+#undef DO_LD1_2
+
+/*
+ * Common helpers for all contiguous 2,3,4-register predicated loads.
+ */
+static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, int size, uintptr_t ra,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    ARMVectorReg scratch[2] = { 0 };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, &scratch[0], i, addr, oi, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+            }
+            i += size, pg >>= size;
+            addr += 2 * size;
+        } while (i & 15);
+    }
+    clear_helper_retaddr();
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+}
+
+static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, int size, uintptr_t ra,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    ARMVectorReg scratch[3] = { 0 };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, &scratch[0], i, addr, oi, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+                tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
+            }
+            i += size, pg >>= size;
+            addr += 3 * size;
+        } while (i & 15);
+    }
+    clear_helper_retaddr();
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
+}
+
+static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, int size, uintptr_t ra,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    ARMVectorReg scratch[4] = { 0 };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, &scratch[0], i, addr, oi, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+                tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
+                tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra);
+            }
+            i += size, pg >>= size;
+            addr += 4 * size;
+        } while (i & 15);
+    }
+    clear_helper_retaddr();
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz);
+}
+
+#define DO_LDN_1(N) \
+void QEMU_FLATTEN HELPER(sve_ld##N##bb_r) \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)  \
+{                                                                   \
+    sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb);  \
+}
+
+#define DO_LDN_2(N, SUFF, SIZE)                                       \
+void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_le_r)                      \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
+{                                                                     \
+    sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(),                 \
+                  sve_ld1##SUFF##_le_tlb);                            \
+}                                                                     \
+void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_be_r)                      \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
+{                                                                     \
+    sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(),                 \
+                  sve_ld1##SUFF##_be_tlb);                            \
+}
+
+DO_LDN_1(2)
+DO_LDN_1(3)
+DO_LDN_1(4)
+
+DO_LDN_2(2, hh, 2)
+DO_LDN_2(3, hh, 2)
+DO_LDN_2(4, hh, 2)
+
+DO_LDN_2(2, ss, 4)
+DO_LDN_2(3, ss, 4)
+DO_LDN_2(4, ss, 4)
+
+DO_LDN_2(2, dd, 8)
+DO_LDN_2(3, dd, 8)
+DO_LDN_2(4, dd, 8)
+
+#undef DO_LDN_1
+#undef DO_LDN_2
+
+/*
+ * Load contiguous data, first-fault and no-fault.
+ *
+ * For user-only, one could argue that we should hold the mmap_lock during
+ * the operation so that there is no race between page_check_range and the
+ * load operation.  However, unmapping pages out from under a running thread
+ * is extraordinarily unlikely.  This theoretical race condition also affects
+ * linux-user/ in its get_user/put_user macros.
+ *
+ * TODO: Construct some helpers, written in assembly, that interact with
+ * handle_cpu_signal to produce memory ops which can properly report errors
+ * without racing.
+ */
+
+/* Fault on byte I.  All bits in FFR from I are cleared.  The vector
+ * result from I is CONSTRAINED UNPREDICTABLE; we choose the MERGE
+ * option, which leaves subsequent data unchanged.
+ */
+static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz)
+{
+    uint64_t *ffr = env->vfp.pregs[FFR_PRED_NUM].p;
+
+    if (i & 63) {
+        ffr[i / 64] &= MAKE_64BIT_MASK(0, i & 63);
+        i = ROUND_UP(i, 64);
+    }
+    for (; i < oprsz; i += 64) {
+        ffr[i / 64] = 0;
+    }
+}
+
+/*
+ * Common helper for all contiguous first-fault loads.
+ */
+static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
+                        uint32_t desc, const uintptr_t retaddr,
+                        const int esz, const int msz,
+                        sve_ld1_host_fn *host_fn,
+                        sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    void *vd = &env->vfp.zregs[rd];
+    const int diffsz = esz - msz;
+    const intptr_t reg_max = simd_oprsz(desc);
+    const intptr_t mem_max = reg_max >> diffsz;
+    intptr_t split, reg_off, mem_off;
+    void *host;
+
+    /* Skip to the first active element.  */
+    reg_off = find_next_active(vg, 0, reg_max, esz);
+    if (unlikely(reg_off == reg_max)) {
+        /* The entire predicate was false; no load occurs.  */
+        memset(vd, 0, reg_max);
+        return;
+    }
+    mem_off = reg_off >> diffsz;
+    set_helper_retaddr(retaddr);
+
+    /*
+     * If the (remaining) load is entirely within a single page, then:
+     * For softmmu, and the tlb hits, then no faults will occur;
+     * For user-only, either the first load will fault or none will.
+     * We can thus perform the load directly to the destination and
+     * Vd will be unmodified on any exception path.
+     */
+    split = max_for_page(env->uc, addr, mem_off, mem_max);
+    if (likely(split == mem_max)) {
+        host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+        if (test_host_page(host)) {
+            mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max);
+            tcg_debug_assert(mem_off == mem_max);
+            clear_helper_retaddr();
+            /* After any fault, zero any leading inactive elements.  */
+            swap_memzero(vd, reg_off);
+            return;
+        }
+    }
+
+    /*
+     * Perform one normal read, which will fault or not.
+     * But it is likely to bring the page into the tlb.
+     */
+    tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr);
+
+    /* After any fault, zero any leading predicated false elts.  */
+    swap_memzero(vd, reg_off);
+    mem_off += 1ULL << msz;
+    reg_off += 1ULL << esz;
+
+    /* Try again to read the balance of the page.  */
+    split = max_for_page(env->uc, addr, mem_off - 1, mem_max);
+    if (split >= (1ULL << msz)) {
+        host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+        if (host) {
+            mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split);
+            reg_off = mem_off << diffsz;
+        }
+    }
+
+    clear_helper_retaddr();
+    record_fault(env, reg_off, reg_max);
+}
+
+/*
+ * Common helper for all contiguous no-fault loads.
+ */
+static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
+                        uint32_t desc, const int esz, const int msz,
+                        sve_ld1_host_fn *host_fn)
+{
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    void *vd = &env->vfp.zregs[rd];
+    const int diffsz = esz - msz;
+    const intptr_t reg_max = simd_oprsz(desc);
+    const intptr_t mem_max = reg_max >> diffsz;
+    const int mmu_idx = cpu_mmu_index(env, false);
+    intptr_t split, reg_off, mem_off;
+    void *host;
+
+    /* There will be no fault, so we may modify in advance.  */
+    memset(vd, 0, reg_max);
+
+    /* Skip to the first active element.  */
+    reg_off = find_next_active(vg, 0, reg_max, esz);
+    if (unlikely(reg_off == reg_max)) {
+        /* The entire predicate was false; no load occurs.  */
+        return;
+    }
+    mem_off = reg_off >> diffsz;
+
+    /*
+     * If the address is not in the TLB, we have no way to bring the
+     * entry into the TLB without also risking a fault.  Note that
+     * the corollary is that we never load from an address not in RAM.
+     *
+     * This last is out of spec, in a weird corner case.
+     * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory
+     * must not actually hit the bus -- it returns UNKNOWN data instead.
+     * But if you map non-RAM with Normal memory attributes and do a NF
+     * load then it should access the bus.  (Nobody ought actually do this
+     * in the real world, obviously.)
+     *
+     * Then there are the annoying special cases with watchpoints...
+     * TODO: Add a form of non-faulting loads using cc->tlb_fill(probe=true).
+     */
+    host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+    split = max_for_page(env->uc, addr, mem_off, mem_max);
+    if (host && split >= (1ULL << msz)) {
+        mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split);
+        reg_off = mem_off << diffsz;
+    }
+
+    record_fault(env, reg_off, reg_max);
+}
+
+#define DO_LDFF1_LDNF1_1(PART, ESZ) \
+void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg,            \
+                                 target_ulong addr, uint32_t desc)      \
+{                                                                       \
+    sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0,                   \
+                sve_ld1##PART##_host, sve_ld1##PART##_tlb);             \
+}                                                                       \
+void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg,            \
+                                 target_ulong addr, uint32_t desc)      \
+{                                                                       \
+    sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host);     \
+}
+
+#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \
+void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg,         \
+                                    target_ulong addr, uint32_t desc)   \
+{                                                                       \
+    sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,                 \
+                sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb);       \
+}                                                                       \
+void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg,         \
+                                    target_ulong addr, uint32_t desc)   \
+{                                                                       \
+    sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \
+}                                                                       \
+void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg,         \
+                                    target_ulong addr, uint32_t desc)   \
+{                                                                       \
+    sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,                 \
+                sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb);       \
+}                                                                       \
+void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg,         \
+                                    target_ulong addr, uint32_t desc)   \
+{                                                                       \
+    sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \
+}
+
+DO_LDFF1_LDNF1_1(bb,  0)
+DO_LDFF1_LDNF1_1(bhu, 1)
+DO_LDFF1_LDNF1_1(bhs, 1)
+DO_LDFF1_LDNF1_1(bsu, 2)
+DO_LDFF1_LDNF1_1(bss, 2)
+DO_LDFF1_LDNF1_1(bdu, 3)
+DO_LDFF1_LDNF1_1(bds, 3)
+
+DO_LDFF1_LDNF1_2(hh,  1, 1)
+DO_LDFF1_LDNF1_2(hsu, 2, 1)
+DO_LDFF1_LDNF1_2(hss, 2, 1)
+DO_LDFF1_LDNF1_2(hdu, 3, 1)
+DO_LDFF1_LDNF1_2(hds, 3, 1)
+
+DO_LDFF1_LDNF1_2(ss,  2, 2)
+DO_LDFF1_LDNF1_2(sdu, 3, 2)
+DO_LDFF1_LDNF1_2(sds, 3, 2)
+
+DO_LDFF1_LDNF1_2(dd,  3, 3)
+
+#undef DO_LDFF1_LDNF1_1
+#undef DO_LDFF1_LDNF1_2
+
+/*
+ * Store contiguous data, protected by a governing predicate.
+ */
+
+#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
+                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
+{                                                                           \
+    TLB(env, addr, *(TYPEM *)((char *)vd + H(reg_off)), oi, ra);                    \
+}
+
+DO_ST_TLB(st1bb,   H1,  uint8_t, stb_p, 0, helper_ret_stb_mmu)
+DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu)
+DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu)
+DO_ST_TLB(st1bd,     , uint64_t, stb_p, 0, helper_ret_stb_mmu)
+
+DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu)
+DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu)
+DO_ST_TLB(st1hd_le,     , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu)
+
+DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu)
+DO_ST_TLB(st1sd_le,     , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu)
+
+DO_ST_TLB(st1dd_le,     , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu)
+
+DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu)
+DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu)
+DO_ST_TLB(st1hd_be,     , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu)
+
+DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu)
+DO_ST_TLB(st1sd_be,     , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu)
+
+DO_ST_TLB(st1dd_be,     , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu)
+
+#undef DO_ST_TLB
+
+/*
+ * Common helpers for all contiguous 1,2,3,4-register predicated stores.
+ */
+static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, const uintptr_t ra,
+                      const int esize, const int msize,
+                      sve_st1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    void *vd = &env->vfp.zregs[rd];
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, vd, i, addr, oi, ra);
+            }
+            i += esize, pg >>= esize;
+            addr += msize;
+        } while (i & 15);
+    }
+    clear_helper_retaddr();
+}
+
+static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, const uintptr_t ra,
+                      const int esize, const int msize,
+                      sve_st1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    void *d1 = &env->vfp.zregs[rd];
+    void *d2 = &env->vfp.zregs[(rd + 1) & 31];
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, d1, i, addr, oi, ra);
+                tlb_fn(env, d2, i, addr + msize, oi, ra);
+            }
+            i += esize, pg >>= esize;
+            addr += 2 * msize;
+        } while (i & 15);
+    }
+    clear_helper_retaddr();
+}
+
+static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, const uintptr_t ra,
+                      const int esize, const int msize,
+                      sve_st1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    void *d1 = &env->vfp.zregs[rd];
+    void *d2 = &env->vfp.zregs[(rd + 1) & 31];
+    void *d3 = &env->vfp.zregs[(rd + 2) & 31];
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, d1, i, addr, oi, ra);
+                tlb_fn(env, d2, i, addr + msize, oi, ra);
+                tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
+            }
+            i += esize, pg >>= esize;
+            addr += 3 * msize;
+        } while (i & 15);
+    }
+    clear_helper_retaddr();
+}
+
+static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, const uintptr_t ra,
+                      const int esize, const int msize,
+                      sve_st1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    void *d1 = &env->vfp.zregs[rd];
+    void *d2 = &env->vfp.zregs[(rd + 1) & 31];
+    void *d3 = &env->vfp.zregs[(rd + 2) & 31];
+    void *d4 = &env->vfp.zregs[(rd + 3) & 31];
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, d1, i, addr, oi, ra);
+                tlb_fn(env, d2, i, addr + msize, oi, ra);
+                tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
+                tlb_fn(env, d4, i, addr + 3 * msize, oi, ra);
+            }
+            i += esize, pg >>= esize;
+            addr += 4 * msize;
+        } while (i & 15);
+    }
+    clear_helper_retaddr();
+}
+
+#define DO_STN_1(N, NAME, ESIZE) \
+void QEMU_FLATTEN HELPER(sve_st##N##NAME##_r) \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)  \
+{                                                                   \
+    sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1,           \
+                  sve_st1##NAME##_tlb);                             \
+}
+
+#define DO_STN_2(N, NAME, ESIZE, MSIZE) \
+void QEMU_FLATTEN HELPER(sve_st##N##NAME##_le_r) \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
+{                                                                     \
+    sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE,         \
+                  sve_st1##NAME##_le_tlb);                            \
+}                                                                     \
+void QEMU_FLATTEN HELPER(sve_st##N##NAME##_be_r)                      \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
+{                                                                     \
+    sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE,         \
+                  sve_st1##NAME##_be_tlb);                            \
+}
+
+DO_STN_1(1, bb, 1)
+DO_STN_1(1, bh, 2)
+DO_STN_1(1, bs, 4)
+DO_STN_1(1, bd, 8)
+DO_STN_1(2, bb, 1)
+DO_STN_1(3, bb, 1)
+DO_STN_1(4, bb, 1)
+
+DO_STN_2(1, hh, 2, 2)
+DO_STN_2(1, hs, 4, 2)
+DO_STN_2(1, hd, 8, 2)
+DO_STN_2(2, hh, 2, 2)
+DO_STN_2(3, hh, 2, 2)
+DO_STN_2(4, hh, 2, 2)
+
+DO_STN_2(1, ss, 4, 4)
+DO_STN_2(1, sd, 8, 4)
+DO_STN_2(2, ss, 4, 4)
+DO_STN_2(3, ss, 4, 4)
+DO_STN_2(4, ss, 4, 4)
+
+DO_STN_2(1, dd, 8, 8)
+DO_STN_2(2, dd, 8, 8)
+DO_STN_2(3, dd, 8, 8)
+DO_STN_2(4, dd, 8, 8)
+
+#undef DO_STN_1
+#undef DO_STN_2
+
+/*
+ * Loads with a vector index.
+ */
+
+/*
+ * Load the element at @reg + @reg_ofs, sign or zero-extend as needed.
+ */
+typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs);
+
+static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs)
+{
+    return *(uint32_t *)((char *)reg + H1_4(reg_ofs));
+}
+
+static target_ulong off_zss_s(void *reg, intptr_t reg_ofs)
+{
+    return *(int32_t *)((char *)reg + H1_4(reg_ofs));
+}
+
+static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs)
+{
+    return (uint32_t)*(uint64_t *)((char *)reg + reg_ofs);
+}
+
+static target_ulong off_zss_d(void *reg, intptr_t reg_ofs)
+{
+    return (int32_t)*(uint64_t *)((char *)reg + reg_ofs);
+}
+
+static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
+{
+    return *(uint64_t *)((char *)reg + reg_ofs);
+}
+
+static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
+                       target_ulong base, uint32_t desc, uintptr_t ra,
+                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    ARMVectorReg scratch = { 0 };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (likely(pg & 1)) {
+                target_ulong off = off_fn(vm, i);
+                tlb_fn(env, &scratch, i, base + (off << scale), oi, ra);
+            }
+            i += 4, pg >>= 4;
+        } while (i & 15);
+    }
+    clear_helper_retaddr();
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(vd, &scratch, oprsz);
+}
+
+static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
+                       target_ulong base, uint32_t desc, uintptr_t ra,
+                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t i, oprsz = simd_oprsz(desc) / 8;
+    ARMVectorReg scratch = { 0 };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; i++) {
+        uint8_t pg = *(uint8_t *)((char *)vg + H1(i));
+        if (likely(pg & 1)) {
+            target_ulong off = off_fn(vm, i * 8);
+            tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra);
+        }
+    }
+    clear_helper_retaddr();
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(vd, &scratch, oprsz * 8);
+}
+
+#define DO_LD1_ZPZ_S(MEM, OFS) \
+void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \
+    (CPUARMState *env, void *vd, void *vg, void *vm,         \
+     target_ulong base, uint32_t desc)                       \
+{                                                            \
+    sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(),         \
+              off_##OFS##_s, sve_ld1##MEM##_tlb);            \
+}
+
+#define DO_LD1_ZPZ_D(MEM, OFS) \
+void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \
+    (CPUARMState *env, void *vd, void *vg, void *vm,         \
+     target_ulong base, uint32_t desc)                       \
+{                                                            \
+    sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(),         \
+               off_##OFS##_d, sve_ld1##MEM##_tlb);           \
+}
+
+DO_LD1_ZPZ_S(bsu, zsu)
+DO_LD1_ZPZ_S(bsu, zss)
+DO_LD1_ZPZ_D(bdu, zsu)
+DO_LD1_ZPZ_D(bdu, zss)
+DO_LD1_ZPZ_D(bdu, zd)
+
+DO_LD1_ZPZ_S(bss, zsu)
+DO_LD1_ZPZ_S(bss, zss)
+DO_LD1_ZPZ_D(bds, zsu)
+DO_LD1_ZPZ_D(bds, zss)
+DO_LD1_ZPZ_D(bds, zd)
+
+DO_LD1_ZPZ_S(hsu_le, zsu)
+DO_LD1_ZPZ_S(hsu_le, zss)
+DO_LD1_ZPZ_D(hdu_le, zsu)
+DO_LD1_ZPZ_D(hdu_le, zss)
+DO_LD1_ZPZ_D(hdu_le, zd)
+
+DO_LD1_ZPZ_S(hsu_be, zsu)
+DO_LD1_ZPZ_S(hsu_be, zss)
+DO_LD1_ZPZ_D(hdu_be, zsu)
+DO_LD1_ZPZ_D(hdu_be, zss)
+DO_LD1_ZPZ_D(hdu_be, zd)
+
+DO_LD1_ZPZ_S(hss_le, zsu)
+DO_LD1_ZPZ_S(hss_le, zss)
+DO_LD1_ZPZ_D(hds_le, zsu)
+DO_LD1_ZPZ_D(hds_le, zss)
+DO_LD1_ZPZ_D(hds_le, zd)
+
+DO_LD1_ZPZ_S(hss_be, zsu)
+DO_LD1_ZPZ_S(hss_be, zss)
+DO_LD1_ZPZ_D(hds_be, zsu)
+DO_LD1_ZPZ_D(hds_be, zss)
+DO_LD1_ZPZ_D(hds_be, zd)
+
+DO_LD1_ZPZ_S(ss_le, zsu)
+DO_LD1_ZPZ_S(ss_le, zss)
+DO_LD1_ZPZ_D(sdu_le, zsu)
+DO_LD1_ZPZ_D(sdu_le, zss)
+DO_LD1_ZPZ_D(sdu_le, zd)
+
+DO_LD1_ZPZ_S(ss_be, zsu)
+DO_LD1_ZPZ_S(ss_be, zss)
+DO_LD1_ZPZ_D(sdu_be, zsu)
+DO_LD1_ZPZ_D(sdu_be, zss)
+DO_LD1_ZPZ_D(sdu_be, zd)
+
+DO_LD1_ZPZ_D(sds_le, zsu)
+DO_LD1_ZPZ_D(sds_le, zss)
+DO_LD1_ZPZ_D(sds_le, zd)
+
+DO_LD1_ZPZ_D(sds_be, zsu)
+DO_LD1_ZPZ_D(sds_be, zss)
+DO_LD1_ZPZ_D(sds_be, zd)
+
+DO_LD1_ZPZ_D(dd_le, zsu)
+DO_LD1_ZPZ_D(dd_le, zss)
+DO_LD1_ZPZ_D(dd_le, zd)
+
+DO_LD1_ZPZ_D(dd_be, zsu)
+DO_LD1_ZPZ_D(dd_be, zss)
+DO_LD1_ZPZ_D(dd_be, zd)
+
+#undef DO_LD1_ZPZ_S
+#undef DO_LD1_ZPZ_D
+
+/* First fault loads with a vector index.  */
+
+/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting.
+ * The controlling predicate is known to be true.  Return true if the
+ * load was successful.
+ */
+typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off,
+                           target_ulong vaddr, int mmu_idx);
+
+#ifdef _MSC_VER
+#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
+static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
+                              target_ulong addr, int mmu_idx)               \
+{                                                                           \
+    struct uc_struct *uc = env->uc;                                         \
+    target_ulong next_page = 0ULL - (addr | TARGET_PAGE_MASK);                    \
+    if (likely(next_page - addr >= sizeof(TYPEM))) {                        \
+        void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);  \
+        if (likely(host)) {                                                 \
+            TYPEM val = HOST(host);                                         \
+            *(TYPEE *)((char *)vd + H(reg_off)) = val;                              \
+            return true;                                                    \
+        }                                                                   \
+    }                                                                       \
+    return false;                                                           \
+}
+#else
+#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
+static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
+                              target_ulong addr, int mmu_idx)               \
+{                                                                           \
+    struct uc_struct *uc = env->uc;                                         \
+    target_ulong next_page = -(addr | TARGET_PAGE_MASK);                    \
+    if (likely(next_page - addr >= sizeof(TYPEM))) {                        \
+        void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);  \
+        if (likely(host)) {                                                 \
+            TYPEM val = HOST(host);                                         \
+            *(TYPEE *)((char *)vd + H(reg_off)) = val;                              \
+            return true;                                                    \
+        }                                                                   \
+    }                                                                       \
+    return false;                                                           \
+}
+#endif
+
+DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p)
+DO_LD_NF(bss, H1_4, uint32_t,  int8_t, ldsb_p)
+DO_LD_NF(bdu,     , uint64_t, uint8_t, ldub_p)
+DO_LD_NF(bds,     , uint64_t,  int8_t, ldsb_p)
+
+DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p)
+DO_LD_NF(hss_le, H1_4, uint32_t,  int16_t, ldsw_le_p)
+DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p)
+DO_LD_NF(hss_be, H1_4, uint32_t,  int16_t, ldsw_be_p)
+DO_LD_NF(hdu_le,     , uint64_t, uint16_t, lduw_le_p)
+DO_LD_NF(hds_le,     , uint64_t,  int16_t, ldsw_le_p)
+DO_LD_NF(hdu_be,     , uint64_t, uint16_t, lduw_be_p)
+DO_LD_NF(hds_be,     , uint64_t,  int16_t, ldsw_be_p)
+
+DO_LD_NF(ss_le,  H1_4, uint32_t, uint32_t, ldl_le_p)
+DO_LD_NF(ss_be,  H1_4, uint32_t, uint32_t, ldl_be_p)
+DO_LD_NF(sdu_le,     , uint64_t, uint32_t, ldl_le_p)
+DO_LD_NF(sds_le,     , uint64_t,  int32_t, ldl_le_p)
+DO_LD_NF(sdu_be,     , uint64_t, uint32_t, ldl_be_p)
+DO_LD_NF(sds_be,     , uint64_t,  int32_t, ldl_be_p)
+
+DO_LD_NF(dd_le,      , uint64_t, uint64_t, ldq_le_p)
+DO_LD_NF(dd_be,      , uint64_t, uint64_t, ldq_be_p)
+
+/*
+ * Common helper for all gather first-faulting loads.
+ */
+static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
+                                target_ulong base, uint32_t desc, uintptr_t ra,
+                                zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
+                                sve_ld1_nf_fn *nonfault_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t reg_off, reg_max = simd_oprsz(desc);
+    target_ulong addr;
+
+    /* Skip to the first true predicate.  */
+    reg_off = find_next_active(vg, 0, reg_max, MO_32);
+    if (likely(reg_off < reg_max)) {
+        /* Perform one normal read, which will fault or not.  */
+        set_helper_retaddr(ra);
+        addr = off_fn(vm, reg_off);
+        addr = base + (addr << scale);
+        tlb_fn(env, vd, reg_off, addr, oi, ra);
+
+        /* The rest of the reads will be non-faulting.  */
+        clear_helper_retaddr();
+    }
+
+    /* After any fault, zero the leading predicated false elements.  */
+    swap_memzero(vd, reg_off);
+
+    while (likely((reg_off += 4) < reg_max)) {
+        uint64_t pg = *(uint64_t *)((char *)vg + (reg_off >> 6) * 8);
+        if (likely((pg >> (reg_off & 63)) & 1)) {
+            addr = off_fn(vm, reg_off);
+            addr = base + (addr << scale);
+            if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) {
+                record_fault(env, reg_off, reg_max);
+                break;
+            }
+        } else {
+            *(uint32_t *)((char *)vd + H1_4(reg_off)) = 0;
+        }
+    }
+}
+
+static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
+                                target_ulong base, uint32_t desc, uintptr_t ra,
+                                zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
+                                sve_ld1_nf_fn *nonfault_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t reg_off, reg_max = simd_oprsz(desc);
+    target_ulong addr;
+
+    /* Skip to the first true predicate.  */
+    reg_off = find_next_active(vg, 0, reg_max, MO_64);
+    if (likely(reg_off < reg_max)) {
+        /* Perform one normal read, which will fault or not.  */
+        set_helper_retaddr(ra);
+        addr = off_fn(vm, reg_off);
+        addr = base + (addr << scale);
+        tlb_fn(env, vd, reg_off, addr, oi, ra);
+
+        /* The rest of the reads will be non-faulting.  */
+        clear_helper_retaddr();
+    }
+
+    /* After any fault, zero the leading predicated false elements.  */
+    swap_memzero(vd, reg_off);
+
+    while (likely((reg_off += 8) < reg_max)) {
+        uint8_t pg = *(uint8_t *)((char *)vg + H1(reg_off >> 3));
+        if (likely(pg & 1)) {
+            addr = off_fn(vm, reg_off);
+            addr = base + (addr << scale);
+            if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) {
+                record_fault(env, reg_off, reg_max);
+                break;
+            }
+        } else {
+            *(uint64_t *)((char *)vd + reg_off) = 0;
+        }
+    }
+}
+
+#define DO_LDFF1_ZPZ_S(MEM, OFS) \
+void HELPER(sve_ldff##MEM##_##OFS)                                      \
+    (CPUARMState *env, void *vd, void *vg, void *vm,                    \
+     target_ulong base, uint32_t desc)                                  \
+{                                                                       \
+    sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(),                  \
+                 off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf);  \
+}
+
+#define DO_LDFF1_ZPZ_D(MEM, OFS) \
+void HELPER(sve_ldff##MEM##_##OFS)                                      \
+    (CPUARMState *env, void *vd, void *vg, void *vm,                    \
+     target_ulong base, uint32_t desc)                                  \
+{                                                                       \
+    sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(),                  \
+                 off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf);  \
+}
+
+DO_LDFF1_ZPZ_S(bsu, zsu)
+DO_LDFF1_ZPZ_S(bsu, zss)
+DO_LDFF1_ZPZ_D(bdu, zsu)
+DO_LDFF1_ZPZ_D(bdu, zss)
+DO_LDFF1_ZPZ_D(bdu, zd)
+
+DO_LDFF1_ZPZ_S(bss, zsu)
+DO_LDFF1_ZPZ_S(bss, zss)
+DO_LDFF1_ZPZ_D(bds, zsu)
+DO_LDFF1_ZPZ_D(bds, zss)
+DO_LDFF1_ZPZ_D(bds, zd)
+
+DO_LDFF1_ZPZ_S(hsu_le, zsu)
+DO_LDFF1_ZPZ_S(hsu_le, zss)
+DO_LDFF1_ZPZ_D(hdu_le, zsu)
+DO_LDFF1_ZPZ_D(hdu_le, zss)
+DO_LDFF1_ZPZ_D(hdu_le, zd)
+
+DO_LDFF1_ZPZ_S(hsu_be, zsu)
+DO_LDFF1_ZPZ_S(hsu_be, zss)
+DO_LDFF1_ZPZ_D(hdu_be, zsu)
+DO_LDFF1_ZPZ_D(hdu_be, zss)
+DO_LDFF1_ZPZ_D(hdu_be, zd)
+
+DO_LDFF1_ZPZ_S(hss_le, zsu)
+DO_LDFF1_ZPZ_S(hss_le, zss)
+DO_LDFF1_ZPZ_D(hds_le, zsu)
+DO_LDFF1_ZPZ_D(hds_le, zss)
+DO_LDFF1_ZPZ_D(hds_le, zd)
+
+DO_LDFF1_ZPZ_S(hss_be, zsu)
+DO_LDFF1_ZPZ_S(hss_be, zss)
+DO_LDFF1_ZPZ_D(hds_be, zsu)
+DO_LDFF1_ZPZ_D(hds_be, zss)
+DO_LDFF1_ZPZ_D(hds_be, zd)
+
+DO_LDFF1_ZPZ_S(ss_le,  zsu)
+DO_LDFF1_ZPZ_S(ss_le,  zss)
+DO_LDFF1_ZPZ_D(sdu_le, zsu)
+DO_LDFF1_ZPZ_D(sdu_le, zss)
+DO_LDFF1_ZPZ_D(sdu_le, zd)
+
+DO_LDFF1_ZPZ_S(ss_be,  zsu)
+DO_LDFF1_ZPZ_S(ss_be,  zss)
+DO_LDFF1_ZPZ_D(sdu_be, zsu)
+DO_LDFF1_ZPZ_D(sdu_be, zss)
+DO_LDFF1_ZPZ_D(sdu_be, zd)
+
+DO_LDFF1_ZPZ_D(sds_le, zsu)
+DO_LDFF1_ZPZ_D(sds_le, zss)
+DO_LDFF1_ZPZ_D(sds_le, zd)
+
+DO_LDFF1_ZPZ_D(sds_be, zsu)
+DO_LDFF1_ZPZ_D(sds_be, zss)
+DO_LDFF1_ZPZ_D(sds_be, zd)
+
+DO_LDFF1_ZPZ_D(dd_le, zsu)
+DO_LDFF1_ZPZ_D(dd_le, zss)
+DO_LDFF1_ZPZ_D(dd_le, zd)
+
+DO_LDFF1_ZPZ_D(dd_be, zsu)
+DO_LDFF1_ZPZ_D(dd_be, zss)
+DO_LDFF1_ZPZ_D(dd_be, zd)
+
+/* Stores with a vector index.  */
+
+static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
+                       target_ulong base, uint32_t desc, uintptr_t ra,
+                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+        do {
+            if (likely(pg & 1)) {
+                target_ulong off = off_fn(vm, i);
+                tlb_fn(env, vd, i, base + (off << scale), oi, ra);
+            }
+            i += 4, pg >>= 4;
+        } while (i & 15);
+    }
+    clear_helper_retaddr();
+}
+
+static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
+                       target_ulong base, uint32_t desc, uintptr_t ra,
+                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t i, oprsz = simd_oprsz(desc) / 8;
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; i++) {
+        uint8_t pg = *(uint8_t *)((char *)vg + H1(i));
+        if (likely(pg & 1)) {
+            target_ulong off = off_fn(vm, i * 8);
+            tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra);
+        }
+    }
+    clear_helper_retaddr();
+}
+
+#define DO_ST1_ZPZ_S(MEM, OFS) \
+void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \
+    (CPUARMState *env, void *vd, void *vg, void *vm,         \
+     target_ulong base, uint32_t desc)                       \
+{                                                            \
+    sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(),         \
+              off_##OFS##_s, sve_st1##MEM##_tlb);            \
+}
+
+#define DO_ST1_ZPZ_D(MEM, OFS) \
+void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \
+    (CPUARMState *env, void *vd, void *vg, void *vm,         \
+     target_ulong base, uint32_t desc)                       \
+{                                                            \
+    sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(),         \
+               off_##OFS##_d, sve_st1##MEM##_tlb);           \
+}
+
+DO_ST1_ZPZ_S(bs, zsu)
+DO_ST1_ZPZ_S(hs_le, zsu)
+DO_ST1_ZPZ_S(hs_be, zsu)
+DO_ST1_ZPZ_S(ss_le, zsu)
+DO_ST1_ZPZ_S(ss_be, zsu)
+
+DO_ST1_ZPZ_S(bs, zss)
+DO_ST1_ZPZ_S(hs_le, zss)
+DO_ST1_ZPZ_S(hs_be, zss)
+DO_ST1_ZPZ_S(ss_le, zss)
+DO_ST1_ZPZ_S(ss_be, zss)
+
+DO_ST1_ZPZ_D(bd, zsu)
+DO_ST1_ZPZ_D(hd_le, zsu)
+DO_ST1_ZPZ_D(hd_be, zsu)
+DO_ST1_ZPZ_D(sd_le, zsu)
+DO_ST1_ZPZ_D(sd_be, zsu)
+DO_ST1_ZPZ_D(dd_le, zsu)
+DO_ST1_ZPZ_D(dd_be, zsu)
+
+DO_ST1_ZPZ_D(bd, zss)
+DO_ST1_ZPZ_D(hd_le, zss)
+DO_ST1_ZPZ_D(hd_be, zss)
+DO_ST1_ZPZ_D(sd_le, zss)
+DO_ST1_ZPZ_D(sd_be, zss)
+DO_ST1_ZPZ_D(dd_le, zss)
+DO_ST1_ZPZ_D(dd_be, zss)
+
+DO_ST1_ZPZ_D(bd, zd)
+DO_ST1_ZPZ_D(hd_le, zd)
+DO_ST1_ZPZ_D(hd_be, zd)
+DO_ST1_ZPZ_D(sd_le, zd)
+DO_ST1_ZPZ_D(sd_be, zd)
+DO_ST1_ZPZ_D(dd_le, zd)
+DO_ST1_ZPZ_D(dd_be, zd)
+
+#undef DO_ST1_ZPZ_S
+#undef DO_ST1_ZPZ_D
diff --git a/qemu/target/arm/cpu-param.h b/qemu/target/arm/cpu-param.h
index 208858c700..9e7aaea79f 100644
--- a/qemu/target/arm/cpu-param.h
+++ b/qemu/target/arm/cpu-param.h
@@ -25,6 +25,6 @@
 # define TARGET_PAGE_BITS_VARY
 # define TARGET_PAGE_BITS_MIN  10
 
-#define NB_MMU_MODES 12
+#define NB_MMU_MODES 11
 
 #endif
diff --git a/qemu/target/arm/cpu-qom.h b/qemu/target/arm/cpu-qom.h
index 963a628d7c..bf5037d346 100644
--- a/qemu/target/arm/cpu-qom.h
+++ b/qemu/target/arm/cpu-qom.h
@@ -32,7 +32,14 @@ struct arm_boot_info;
 
 #define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU
 
-typedef struct ARMCPUInfo ARMCPUInfo;
+typedef struct ARMCPUInfo {
+    const char *name;
+    void (*initfn)(struct uc_struct *uc, CPUState *obj);
+    void (*class_init)(struct uc_struct *uc, CPUClass *oc, void *data);
+} ARMCPUInfo;
+
+void arm_cpu_register(const ARMCPUInfo *info);
+void aarch64_cpu_register(const ARMCPUInfo *info);
 
 /**
  * ARMCPUClass:
diff --git a/qemu/target/arm/cpu.c b/qemu/target/arm/cpu.c
index 7613381980..e0c59cf0f1 100644
--- a/qemu/target/arm/cpu.c
+++ b/qemu/target/arm/cpu.c
@@ -596,16 +596,6 @@ void arm_cpu_update_vfiq(ARMCPU *cpu)
     }
 }
 
-static inline void set_feature(CPUARMState *env, int feature)
-{
-    env->features |= 1ULL << feature;
-}
-
-static inline void unset_feature(CPUARMState *env, int feature)
-{
-    env->features &= ~(1ULL << feature);
-}
-
 static uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz)
 {
     uint32_t Aff1 = idx / clustersz;
@@ -2003,6 +1993,7 @@ static void arm_max_initfn(struct uc_struct *uc, CPUState *obj)
             FIELD_DP32(t, ID_MMFR4, HPDS, 1, t); /* AA32HPD */
             FIELD_DP32(t, ID_MMFR4, AC2, 1, t); /* ACTLR2, HACTLR2 */
             FIELD_DP32(t, ID_MMFR4, CNP, 1, t); /* TTCNP */
+            FIELD_DP32(t, ID_MMFR4, XNX, 1, t); /* TTS2UXN */
             cpu->isar.id_mmfr4 = t;
         }
 //#endif
@@ -2012,12 +2003,6 @@ static void arm_max_initfn(struct uc_struct *uc, CPUState *obj)
 
 #endif /* !defined(TARGET_AARCH64) */
 
-struct ARMCPUInfo {
-    const char *name;
-    void (*initfn)(struct uc_struct *uc, CPUState *obj);
-    void (*class_init)(struct uc_struct *uc, CPUClass *oc, void *data);
-};
-
 #if !defined(TARGET_AARCH64)
 static struct ARMCPUInfo arm_cpus[] = {
     { "arm926",      arm926_initfn },
diff --git a/qemu/target/arm/cpu.h b/qemu/target/arm/cpu.h
index f857850cfc..794c5ab05b 100644
--- a/qemu/target/arm/cpu.h
+++ b/qemu/target/arm/cpu.h
@@ -480,6 +480,9 @@ typedef struct CPUARMState {
         uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
         uint64_t vpidr_el2; /* Virtualization Processor ID Register */
         uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
+        uint64_t tfsr_el[4]; /* tfsre0_el1 is index 0.  */
+        uint64_t gcr_el1;
+        uint64_t rgsr_el1;
     } cp15;
 
     struct {
@@ -548,6 +551,8 @@ typedef struct CPUARMState {
         uint64_t esr;
     } serror;
 
+    uint8_t ext_dabt_raised; /* Tracking/verifying injection of ext DABT */
+
     /* State of our input IRQ/FIQ/VIRQ/VFIQ lines */
     uint32_t irq_line_state;
 
@@ -680,6 +685,16 @@ typedef struct CPUARMState {
     struct uc_struct *uc;
 } CPUARMState;
 
+static inline void set_feature(CPUARMState *env, int feature)
+{
+    env->features |= 1ULL << feature;
+}
+
+static inline void unset_feature(CPUARMState *env, int feature)
+{
+    env->features &= ~(1ULL << feature);
+}
+
 /**
  * ARMELChangeHookFn:
  * type of a function which can be registered via arm_register_el_change_hook()
@@ -757,6 +772,10 @@ struct ARMCPU {
     /* MemoryRegion to use for secure physical accesses */
     MemoryRegion *secure_memory;
 
+    /* MemoryRegion to use for allocation tag accesses */
+    MemoryRegion *tag_memory;
+    MemoryRegion *secure_tag_memory;
+
     /* For v8M, pointer to the IDAU interface provided by board/SoC */
     void *idau;
 
@@ -858,7 +877,7 @@ struct ARMCPU {
         uint64_t id_aa64dfr0;
         uint64_t id_aa64dfr1;
     } isar;
-    uint32_t midr;
+    uint64_t midr;
     uint32_t revidr;
     uint32_t reset_fpsid;
     uint32_t ctr;
@@ -1152,7 +1171,7 @@ void pmu_init(ARMCPU *cpu);
 #define CACHED_CPSR_BITS (CPSR_T | CPSR_AIF | CPSR_GE | CPSR_IT | CPSR_Q \
     | CPSR_NZCV)
 /* Bits writable in user mode.  */
-#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE)
+#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE | CPSR_E)
 /* Execution state bits.  MRS read as zero, MSR writes ignored.  */
 #define CPSR_EXEC (CPSR_T | CPSR_IT | CPSR_J | CPSR_IL)
 
@@ -1204,6 +1223,7 @@ void pmu_init(ARMCPU *cpu);
 #define PSTATE_SS (1U << 21)
 #define PSTATE_PAN (1U << 22)
 #define PSTATE_UAO (1U << 23)
+#define PSTATE_TCO (1U << 25)
 #define PSTATE_V (1U << 28)
 #define PSTATE_C (1U << 29)
 #define PSTATE_Z (1U << 30)
@@ -2327,7 +2347,7 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
  * migration or KVM state synchronization. (Typically this is for "registers"
  * which are actually used as instructions for cache maintenance and so on.)
  * IO indicates that this register does I/O and therefore its accesses
- * need to be surrounded by gen_io_start()/gen_io_end(). In particular,
+ * need to be marked with gen_io_start() and also end the TB. In particular,
  * registers which implement clocks or timers require this.
  * RAISES_EXC is for when the read or write hook might raise an exception;
  * the generated code will synchronize the CPU state before calling the hook
@@ -2349,7 +2369,9 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
 #define ARM_CP_NZCV              (ARM_CP_SPECIAL | 0x0300)
 #define ARM_CP_CURRENTEL         (ARM_CP_SPECIAL | 0x0400)
 #define ARM_CP_DC_ZVA            (ARM_CP_SPECIAL | 0x0500)
-#define ARM_LAST_SPECIAL         ARM_CP_DC_ZVA
+#define ARM_CP_DC_GVA            (ARM_CP_SPECIAL | 0x0600)
+#define ARM_CP_DC_GZVA           (ARM_CP_SPECIAL | 0x0700)
+#define ARM_LAST_SPECIAL         ARM_CP_DC_GZVA
 #define ARM_CP_FPU               0x1000
 #define ARM_CP_SVE               0x2000
 #define ARM_CP_NO_GDB            0x4000
@@ -2804,6 +2826,9 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
  *     handling via the TLB. The only way to do a stage 1 translation without
  *     the immediate stage 2 translation is via the ATS or AT system insns,
  *     which can be slow-pathed and always do a page table walk.
+ *     The only use of stage 2 translations is either as part of an s1+2
+ *     lookup or when loading the descriptors during a stage 1 page table walk,
+ *     and in both those cases we don't use the TLB.
  *  4. we can also safely fold together the "32 bit EL3" and "64 bit EL3"
  *     translation regimes, because they map reasonably well to each other
  *     and they can't both be active at the same time.
@@ -2819,15 +2844,15 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
  * NS EL1 EL1&0 stage 1+2 (aka NS PL1)
  * NS EL1 EL1&0 stage 1+2 +PAN
  * NS EL0 EL2&0
+ * NS EL2 EL2&0
  * NS EL2 EL2&0 +PAN
  * NS EL2 (aka NS PL2)
  * S EL0 EL1&0 (aka S PL0)
  * S EL1 EL1&0 (not used if EL3 is 32 bit)
  * S EL1 EL1&0 +PAN
  * S EL3 (aka S PL1)
- * NS EL1&0 stage 2
  *
- * for a total of 12 different mmu_idx.
+ * for a total of 11 different mmu_idx.
  *
  * R profile CPUs have an MPU, but can use the same set of MMU indexes
  * as A profile. They only need to distinguish NS EL0 and NS EL1 (and
@@ -2850,8 +2875,8 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
  * vs A/R profile) would like to use MMU indexes with different semantics,
  * but since we don't ever need to use all of those in a single CPU we
  * can avoid setting NB_MMU_MODES to more than 8. The lower bits of
- * ARMMMUIdx are the core TLB mmu index, and the higher bits are always
- * the same for any particular CPU.
+ * can avoid having to set NB_MMU_MODES to "total number of A profile MMU
+ * modes + total number of M profile MMU modes". The lower bits of
  * Variables of type ARMMUIdx are always full values, and the core
  * index values are in variables of type 'int'.
  *
@@ -2897,8 +2922,6 @@ typedef enum ARMMMUIdx {
     ARMMMUIdx_SE10_1_PAN = 9 | ARM_MMU_IDX_A,
     ARMMMUIdx_SE3        = 10 | ARM_MMU_IDX_A,
 
-    ARMMMUIdx_Stage2     = 11 | ARM_MMU_IDX_A,
-
     /*
      * These are not allocated TLBs and are used only for AT system
      * instructions or for the first stage of an S12 page table walk.
@@ -2906,6 +2929,14 @@ typedef enum ARMMMUIdx {
     ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB,
     ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB,
     ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB,
+    /*
+     * Not allocated a TLB: used only for second stage of an S12 page
+     * table walk, or for descriptor loads during first stage of an S1
+     * page table walk. Note that if we ever want to have a TLB for this
+     * then various TLB flush insns which currently are no-ops or flush
+     * only stage 1 MMU indexes will need to change to flush stage 2.
+    */
+    ARMMMUIdx_Stage2     = 3 | ARM_MMU_IDX_NOTLB,
 
     /*
      * M-profile.
@@ -2939,7 +2970,6 @@ typedef enum ARMMMUIdxBit {
     TO_CORE_BIT(SE10_1),
     TO_CORE_BIT(SE10_1_PAN),
     TO_CORE_BIT(SE3),
-    TO_CORE_BIT(Stage2),
 
     TO_CORE_BIT(MUser),
     TO_CORE_BIT(MPriv),
@@ -2959,6 +2989,8 @@ typedef enum ARMMMUIdxBit {
 typedef enum ARMASIdx {
     ARMASIdx_NS = 0,
     ARMASIdx_S = 1,
+    ARMASIdx_TagNS = 2,
+    ARMASIdx_TagS = 3,
 } ARMASIdx;
 
 /* Return the Exception Level targeted by debug exceptions. */
@@ -3145,10 +3177,10 @@ typedef ARMCPU ArchCPU;
  * |              |     |   TBFLAG_A32   |              |
  * |              |     +-----+----------+  TBFLAG_AM32 |
  * |  TBFLAG_ANY  |           |TBFLAG_M32|              |
- * |              |         +-+----------+--------------|
- * |              |         |         TBFLAG_A64        |
- * +--------------+---------+---------------------------+
- *  31          20        15                           0
+ * |              +-----------+----------+--------------|
+ * |              |            TBFLAG_A64               |
+ * +--------------+-------------------------------------+
+ *  31          20                                     0
  *
  * Unless otherwise noted, these bits are cached in env->hflags.
  */
@@ -3215,6 +3247,10 @@ FIELD(TBFLAG_A64, BT, 9, 1)
 FIELD(TBFLAG_A64, BTYPE, 10, 2)         /* Not cached. */
 FIELD(TBFLAG_A64, TBID, 12, 2)
 FIELD(TBFLAG_A64, UNPRIV, 14, 1)
+FIELD(TBFLAG_A64, ATA, 15, 1)
+FIELD(TBFLAG_A64, TCMA, 16, 2)
+FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1)
+FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1)
 
 /**
  * cpu_mmu_index:
@@ -3300,6 +3336,20 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno)
 /* Shared between translate-sve.c and sve_helper.c.  */
 extern const uint64_t pred_esz_masks[4];
 
+/* Helper for the macros below, validating the argument type. */
+static inline MemTxAttrs *typecheck_memtxattrs(MemTxAttrs *x)
+{
+    return x;
+}
+
+/*
+ * Lvalue macros for ARM TLB bits that we must cache in the TCG TLB.
+ * Using these should be a bit more self-documenting than using the
+ * generic target bits directly.
+ */
+#define arm_tlb_bti_gp(x) (typecheck_memtxattrs(x)->target_tlb_bit0)
+#define arm_tlb_mte_tagged(x) (typecheck_memtxattrs(x)->target_tlb_bit1)
+
 /*
  * Naming convention for isar_feature functions:
  * Functions which test 32-bit ID registers should have _aa32_ in
@@ -3539,6 +3589,11 @@ static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id)
     return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0;
 }
 
+static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0;
+}
+
 /*
  * 64-bit feature tests via id registers.
  */
@@ -3727,6 +3782,18 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0;
 }
 
+static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0;
+}
+
+static inline bool isar_feature_aa64_mte(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2;
+}
+
+
+
 static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id)
 {
     return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 &&
@@ -3754,6 +3821,11 @@ static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0;
 }
 
+static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0;
+}
+
 /*
  * Feature tests for "does this exist in either 32-bit or 64-bit?"
  */
@@ -3782,6 +3854,11 @@ static inline bool isar_feature_any_ccidx(const ARMISARegisters *id)
     return isar_feature_aa64_ccidx(id) || isar_feature_aa32_ccidx(id);
 }
 
+static inline bool isar_feature_any_tts2uxn(const ARMISARegisters *id)
+{
+    return isar_feature_aa64_tts2uxn(id) || isar_feature_aa32_tts2uxn(id);
+}
+
 /*
  * Forward to the above feature tests given an ARMCPU pointer.
  */
diff --git a/qemu/target/arm/cpu64.c b/qemu/target/arm/cpu64.c
index 3c57a52aee..b012e5d185 100644
--- a/qemu/target/arm/cpu64.c
+++ b/qemu/target/arm/cpu64.c
@@ -28,12 +28,6 @@ void arm_cpu_post_init(CPUState *obj);
 void arm_cpu_initfn(struct uc_struct *uc, CPUState *obj);
 ARMCPU *cpu_arm_init(struct uc_struct *uc);
 
-
-static inline void set_feature(CPUARMState *env, int feature)
-{
-    env->features |= 1ULL << feature;
-}
-
 static void aarch64_a57_initfn(struct uc_struct *uc, CPUState *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
@@ -262,6 +256,7 @@ static void aarch64_max_initfn(struct uc_struct *uc, CPUState *obj)
     FIELD_DP64(t, ID_AA64MMFR1, VH, 1, t);
     FIELD_DP64(t, ID_AA64MMFR1, PAN, 2, t); /* ATS1E1 */
     FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2, t); /* VMID16 */
+    FIELD_DP64(t, ID_AA64MMFR1, XNX, 1, t); /* TTS2UXN */
     cpu->isar.id_aa64mmfr1 = t;
 
     t = cpu->isar.id_aa64mmfr2;
@@ -295,22 +290,18 @@ static void aarch64_max_initfn(struct uc_struct *uc, CPUState *obj)
     FIELD_DP32(u, ID_MMFR4, HPDS, 1, u); /* AA32HPD */
     FIELD_DP32(u, ID_MMFR4, AC2, 1, u); /* ACTLR2, HACTLR2 */
     FIELD_DP32(u, ID_MMFR4, CNP, 1, u); /* TTCNP */
+    FIELD_DP32(u, ID_MMFR4, XNX, 1, t); /* TTS2UXN */
     cpu->isar.id_mmfr4 = u;
 
-    u = cpu->isar.id_aa64dfr0;
-    FIELD_DP64(u, ID_AA64DFR0, PMUVER, 5, u); /* v8.4-PMU */
-    cpu->isar.id_aa64dfr0 = u;
+    t = cpu->isar.id_aa64dfr0;
+    FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5, t); /* v8.4-PMU */
+    cpu->isar.id_aa64dfr0 = t;
 
     u = cpu->isar.id_dfr0;
     FIELD_DP32(u, ID_DFR0, PERFMON, 5, u); /* v8.4-PMU */
     cpu->isar.id_dfr0 = u;
 }
 
-struct ARMCPUInfo {
-    const char *name;
-    void (*initfn)(struct uc_struct *uc, CPUState *obj);
-};
-
 static const ARMCPUInfo aarch64_cpus[] = {
     { .name = "cortex-a57",         .initfn = aarch64_a57_initfn },
     { .name = "cortex-a53",         .initfn = aarch64_a53_initfn },
diff --git a/qemu/target/arm/crypto_helper.c b/qemu/target/arm/crypto_helper.c
index 117be6f89f..137e776059 100644
--- a/qemu/target/arm/crypto_helper.c
+++ b/qemu/target/arm/crypto_helper.c
@@ -13,7 +13,9 @@
 
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "tcg/tcg-gvec-desc.h"
 #include "crypto/aes.h"
+#include "vec_internal.h"
 
 union CRYPTO_STATE {
     uint8_t    bytes[16];
@@ -22,25 +24,35 @@ union CRYPTO_STATE {
 };
 
 #ifdef HOST_WORDS_BIGENDIAN
-#define CR_ST_BYTE(state, i)   (state.bytes[(15 - (i)) ^ 8])
-#define CR_ST_WORD(state, i)   (state.words[(3 - (i)) ^ 2])
+#define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
+#define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
 #else
-#define CR_ST_BYTE(state, i)   (state.bytes[i])
-#define CR_ST_WORD(state, i)   (state.words[i])
+#define CR_ST_BYTE(state, i)   ((state).bytes[i])
+#define CR_ST_WORD(state, i)   ((state).words[i])
 #endif
 
-void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt)
+/*
+ * The caller has not been converted to full gvec, and so only
+ * modifies the low 16 bytes of the vector register.
+ */
+static void clear_tail_16(void *vd, uint32_t desc)
+{
+    int opr_sz = simd_oprsz(desc);
+    int max_sz = simd_maxsz(desc);
+
+    assert(opr_sz == 16);
+    clear_tail(vd, opr_sz, max_sz);
+}
+
+static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
+                           uint64_t *rm, bool decrypt)
 {
     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
-    uint64_t *rd = vd;
-    uint64_t *rm = vm;
     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
-    union CRYPTO_STATE st = { .l = { rd[0], rd[1] } };
+    union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
     int i;
 
-    assert(decrypt < 2);
-
     /* xor state vector with round key */
     rk.l[0] ^= st.l[0];
     rk.l[1] ^= st.l[1];
@@ -54,7 +66,18 @@ void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt)
     rd[1] = st.l[1];
 }
 
-void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
+void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    bool decrypt = simd_data(desc);
+
+    for (i = 0; i < opr_sz; i += 16) {
+        do_crypto_aese((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vn + i), (uint64_t*)((char*)vm + i), decrypt);
+    }
+    clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
 {
     static uint32_t const mc[][256] = { {
         /* MixColumns lookup table */
@@ -190,13 +213,9 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
     } };
 
-    uint64_t *rd = vd;
-    uint64_t *rm = vm;
     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
     int i;
 
-    assert(decrypt < 2);
-
     for (i = 0; i < 16; i += 4) {
         CR_ST_WORD(st, i >> 2) =
             mc[decrypt][CR_ST_BYTE(st, i)] ^
@@ -209,6 +228,17 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
     rd[1] = st.l[1];
 }
 
+void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    bool decrypt = simd_data(desc);
+
+    for (i = 0; i < opr_sz; i += 16) {
+        do_crypto_aesmc((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vm + i), decrypt);
+    }
+    clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
 /*
  * SHA-1 logical functions
  */
@@ -228,52 +258,78 @@ static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
     return (x & y) | ((x | y) & z);
 }
 
-void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
+void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint64_t d0, d1;
+
+    d0 = d[1] ^ d[0] ^ m[0];
+    d1 = n[0] ^ d[1] ^ m[1];
+    d[0] = d0;
+    d[1] = d1;
+
+    clear_tail_16(vd, desc);
+}
+
+
+static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
+                                    uint64_t *rm, uint32_t desc,
+                                    uint32_t (*fn)(union CRYPTO_STATE *d))
 {
-    uint64_t *rd = vd;
-    uint64_t *rn = vn;
-    uint64_t *rm = vm;
     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+    int i;
 
-    if (op == 3) { /* sha1su0 */
-        d.l[0] ^= d.l[1] ^ m.l[0];
-        d.l[1] ^= n.l[0] ^ m.l[1];
-    } else {
-        int i;
-
-        for (i = 0; i < 4; i++) {
-            uint32_t t = 0;
-
-            switch (op) {
-            case 0: /* sha1c */
-                t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
-                break;
-            case 1: /* sha1p */
-                t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
-                break;
-            case 2: /* sha1m */
-                t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
-                break;
-            default:
-                g_assert_not_reached();
-            }
-            t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
-                 + CR_ST_WORD(m, i);
-
-            CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
-            CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
-            CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
-            CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
-            CR_ST_WORD(d, 0) = t;
-        }
+    for (i = 0; i < 4; i++) {
+        uint32_t t = fn(&d);
+
+        t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
+             + CR_ST_WORD(m, i);
+
+        CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
+        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
+        CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
+        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
+        CR_ST_WORD(d, 0) = t;
     }
     rd[0] = d.l[0];
     rd[1] = d.l[1];
+
+    clear_tail_16(rd, desc);
+}
+
+static uint32_t do_sha1c(union CRYPTO_STATE *d)
+{
+    return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
+}
+
+static uint32_t do_sha1p(union CRYPTO_STATE *d)
+{
+    return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
 }
 
-void HELPER(crypto_sha1h)(void *vd, void *vm)
+void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
+}
+
+static uint32_t do_sha1m(union CRYPTO_STATE *d)
+{
+    return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
+}
+
+void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rm = vm;
@@ -284,9 +340,11 @@ void HELPER(crypto_sha1h)(void *vd, void *vm)
 
     rd[0] = m.l[0];
     rd[1] = m.l[1];
+
+    clear_tail_16(vd, desc);
 }
 
-void HELPER(crypto_sha1su1)(void *vd, void *vm)
+void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rm = vm;
@@ -300,6 +358,8 @@ void HELPER(crypto_sha1su1)(void *vd, void *vm)
 
     rd[0] = d.l[0];
     rd[1] = d.l[1];
+
+    clear_tail_16(vd, desc);
 }
 
 /*
@@ -327,7 +387,7 @@ static uint32_t s1(uint32_t x)
     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
 }
 
-void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rn = vn;
@@ -358,9 +418,11 @@ void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
 
     rd[0] = d.l[0];
     rd[1] = d.l[1];
+
+    clear_tail_16(vd, desc);
 }
 
-void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rn = vn;
@@ -383,9 +445,11 @@ void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
 
     rd[0] = d.l[0];
     rd[1] = d.l[1];
+
+    clear_tail_16(vd, desc);
 }
 
-void HELPER(crypto_sha256su0)(void *vd, void *vm)
+void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rm = vm;
@@ -399,9 +463,11 @@ void HELPER(crypto_sha256su0)(void *vd, void *vm)
 
     rd[0] = d.l[0];
     rd[1] = d.l[1];
+
+    clear_tail_16(vd, desc);
 }
 
-void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rn = vn;
@@ -417,6 +483,8 @@ void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
 
     rd[0] = d.l[0];
     rd[1] = d.l[1];
+
+    clear_tail_16(vd, desc);
 }
 
 /*
@@ -453,7 +521,7 @@ static uint64_t s1_512(uint64_t x)
     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
 }
 
-void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rn = vn;
@@ -466,9 +534,11 @@ void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm)
 
     rd[0] = d0;
     rd[1] = d1;
+
+    clear_tail_16(vd, desc);
 }
 
-void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rn = vn;
@@ -481,9 +551,11 @@ void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm)
 
     rd[0] = d0;
     rd[1] = d1;
+
+    clear_tail_16(rd, desc);
 }
 
-void HELPER(crypto_sha512su0)(void *vd, void *vn)
+void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rn = vn;
@@ -495,9 +567,11 @@ void HELPER(crypto_sha512su0)(void *vd, void *vn)
 
     rd[0] = d0;
     rd[1] = d1;
+
+    clear_tail_16(vd, desc);
 }
 
-void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rn = vn;
@@ -505,9 +579,11 @@ void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm)
 
     rd[0] += s1_512(rn[0]) + rm[0];
     rd[1] += s1_512(rn[1]) + rm[1];
+
+    clear_tail_16(vd, desc);
 }
 
-void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rn = vn;
@@ -531,9 +607,11 @@ void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm)
 
     rd[0] = d.l[0];
     rd[1] = d.l[1];
+
+    clear_tail_16(vd, desc);
 }
 
-void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     uint64_t *rd = vd;
     uint64_t *rn = vn;
@@ -551,17 +629,18 @@ void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm)
 
     rd[0] = d.l[0];
     rd[1] = d.l[1];
+
+    clear_tail_16(vd, desc);
 }
 
-void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
-                          uint32_t opcode)
+static inline void QEMU_ALWAYS_INLINE
+crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
+             uint32_t desc, uint32_t opcode)
 {
-    uint64_t *rd = vd;
-    uint64_t *rn = vn;
-    uint64_t *rm = vm;
     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+    uint32_t imm2 = simd_data(desc);
     uint32_t t = 0;
 
     assert(imm2 < 4);
@@ -576,7 +655,7 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
         /* SM3TT2B */
         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
     } else {
-        g_assert_not_reached();
+        qemu_build_not_reached();
     }
 
     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
@@ -601,8 +680,21 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
 
     rd[0] = d.l[0];
     rd[1] = d.l[1];
+
+    clear_tail_16(rd, desc);
 }
 
+#define DO_SM3TT(NAME, OPCODE) \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+    { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
+
+DO_SM3TT(crypto_sm3tt1a, 0)
+DO_SM3TT(crypto_sm3tt1b, 1)
+DO_SM3TT(crypto_sm3tt2a, 2)
+DO_SM3TT(crypto_sm3tt2b, 3)
+
+#undef DO_SM3TT
+
 static uint8_t const sm4_sbox[] = {
     0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
     0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
@@ -638,12 +730,10 @@ static uint8_t const sm4_sbox[] = {
     0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
 };
 
-void HELPER(crypto_sm4e)(void *vd, void *vn)
+static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
 {
-    uint64_t *rd = vd;
-    uint64_t *rn = vn;
-    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
-    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+    union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
+    union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
     uint32_t t, i;
 
     for (i = 0; i < 4; i++) {
@@ -665,11 +755,18 @@ void HELPER(crypto_sm4e)(void *vd, void *vn)
     rd[1] = d.l[1];
 }
 
-void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm)
+void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+
+    for (i = 0; i < opr_sz; i += 16) {
+        do_crypto_sm4e((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vn + i), (uint64_t*)((char*)vm + i));
+    }
+    clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
 {
-    uint64_t *rd = vd;
-    uint64_t *rn = vn;
-    uint64_t *rm = vm;
     union CRYPTO_STATE d;
     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
@@ -693,3 +790,24 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm)
     rd[0] = d.l[0];
     rd[1] = d.l[1];
 }
+
+void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+
+    for (i = 0; i < opr_sz; i += 16) {
+        do_crypto_sm4ekey((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vn + i), (uint64_t*)((char*)vm + i));
+    }
+    clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        d[i] = n[i] ^ rol64(m[i], 1);
+    }
+    clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
diff --git a/qemu/target/arm/decode-a32.inc.c b/qemu/target/arm/decode-a32.inc.c
index cf6c644a83..a1b30a2caa 100644
--- a/qemu/target/arm/decode-a32.inc.c
+++ b/qemu/target/arm/decode-a32.inc.c
@@ -21,9 +21,7 @@ typedef struct {
 } arg_disas_a3226;
 
 typedef struct {
-#ifdef _MSC_VER
-    int dummy;
-#endif
+    int : 0;
 } arg_empty;
 
 typedef struct {
diff --git a/qemu/target/arm/decode-neon-dp.inc.c b/qemu/target/arm/decode-neon-dp.inc.c
new file mode 100644
index 0000000000..c09bfb5e55
--- /dev/null
+++ b/qemu/target/arm/decode-neon-dp.inc.c
@@ -0,0 +1,2806 @@
+/* This file is autogenerated by scripts/decodetree.py.  */
+
+typedef struct {
+    int cmode;
+    int imm;
+    int op;
+    int q;
+    int vd;
+} arg_1reg_imm;
+
+typedef struct {
+    int q;
+    int size;
+    int vd;
+    int vm;
+} arg_2misc;
+
+typedef struct {
+    int q;
+    int shift;
+    int size;
+    int vd;
+    int vm;
+} arg_2reg_shift;
+
+typedef struct {
+    int q;
+    int size;
+    int vd;
+    int vm;
+    int vn;
+} arg_2scalar;
+
+typedef struct {
+    int size;
+    int vd;
+    int vm;
+    int vn;
+} arg_3diff;
+
+typedef struct {
+    int q;
+    int size;
+    int vd;
+    int vm;
+    int vn;
+} arg_3same;
+
+typedef struct {
+    int imm;
+    int q;
+    int vd;
+    int vm;
+    int vn;
+} arg_disas_neon_dp3;
+
+typedef struct {
+    int len;
+    int op;
+    int vd;
+    int vm;
+    int vn;
+} arg_disas_neon_dp4;
+
+typedef struct {
+    int index;
+    int q;
+    int size;
+    int vd;
+    int vm;
+} arg_disas_neon_dp5;
+
+typedef arg_3same arg_VHADD_S_3s;
+static bool trans_VHADD_S_3s(DisasContext *ctx, arg_VHADD_S_3s *a);
+typedef arg_3same arg_VHADD_U_3s;
+static bool trans_VHADD_U_3s(DisasContext *ctx, arg_VHADD_U_3s *a);
+typedef arg_3same arg_VQADD_S_3s;
+static bool trans_VQADD_S_3s(DisasContext *ctx, arg_VQADD_S_3s *a);
+typedef arg_3same arg_VQADD_U_3s;
+static bool trans_VQADD_U_3s(DisasContext *ctx, arg_VQADD_U_3s *a);
+typedef arg_3same arg_VRHADD_S_3s;
+static bool trans_VRHADD_S_3s(DisasContext *ctx, arg_VRHADD_S_3s *a);
+typedef arg_3same arg_VRHADD_U_3s;
+static bool trans_VRHADD_U_3s(DisasContext *ctx, arg_VRHADD_U_3s *a);
+typedef arg_3same arg_VAND_3s;
+static bool trans_VAND_3s(DisasContext *ctx, arg_VAND_3s *a);
+typedef arg_3same arg_VBIC_3s;
+static bool trans_VBIC_3s(DisasContext *ctx, arg_VBIC_3s *a);
+typedef arg_3same arg_VORR_3s;
+static bool trans_VORR_3s(DisasContext *ctx, arg_VORR_3s *a);
+typedef arg_3same arg_VORN_3s;
+static bool trans_VORN_3s(DisasContext *ctx, arg_VORN_3s *a);
+typedef arg_3same arg_VEOR_3s;
+static bool trans_VEOR_3s(DisasContext *ctx, arg_VEOR_3s *a);
+typedef arg_3same arg_VBSL_3s;
+static bool trans_VBSL_3s(DisasContext *ctx, arg_VBSL_3s *a);
+typedef arg_3same arg_VBIT_3s;
+static bool trans_VBIT_3s(DisasContext *ctx, arg_VBIT_3s *a);
+typedef arg_3same arg_VBIF_3s;
+static bool trans_VBIF_3s(DisasContext *ctx, arg_VBIF_3s *a);
+typedef arg_3same arg_VHSUB_S_3s;
+static bool trans_VHSUB_S_3s(DisasContext *ctx, arg_VHSUB_S_3s *a);
+typedef arg_3same arg_VHSUB_U_3s;
+static bool trans_VHSUB_U_3s(DisasContext *ctx, arg_VHSUB_U_3s *a);
+typedef arg_3same arg_VQSUB_S_3s;
+static bool trans_VQSUB_S_3s(DisasContext *ctx, arg_VQSUB_S_3s *a);
+typedef arg_3same arg_VQSUB_U_3s;
+static bool trans_VQSUB_U_3s(DisasContext *ctx, arg_VQSUB_U_3s *a);
+typedef arg_3same arg_VCGT_S_3s;
+static bool trans_VCGT_S_3s(DisasContext *ctx, arg_VCGT_S_3s *a);
+typedef arg_3same arg_VCGT_U_3s;
+static bool trans_VCGT_U_3s(DisasContext *ctx, arg_VCGT_U_3s *a);
+typedef arg_3same arg_VCGE_S_3s;
+static bool trans_VCGE_S_3s(DisasContext *ctx, arg_VCGE_S_3s *a);
+typedef arg_3same arg_VCGE_U_3s;
+static bool trans_VCGE_U_3s(DisasContext *ctx, arg_VCGE_U_3s *a);
+typedef arg_3same arg_VSHL_S_3s;
+static bool trans_VSHL_S_3s(DisasContext *ctx, arg_VSHL_S_3s *a);
+typedef arg_3same arg_VSHL_U_3s;
+static bool trans_VSHL_U_3s(DisasContext *ctx, arg_VSHL_U_3s *a);
+typedef arg_3same arg_VQSHL_S64_3s;
+static bool trans_VQSHL_S64_3s(DisasContext *ctx, arg_VQSHL_S64_3s *a);
+typedef arg_3same arg_VQSHL_S_3s;
+static bool trans_VQSHL_S_3s(DisasContext *ctx, arg_VQSHL_S_3s *a);
+typedef arg_3same arg_VQSHL_U64_3s;
+static bool trans_VQSHL_U64_3s(DisasContext *ctx, arg_VQSHL_U64_3s *a);
+typedef arg_3same arg_VQSHL_U_3s;
+static bool trans_VQSHL_U_3s(DisasContext *ctx, arg_VQSHL_U_3s *a);
+typedef arg_3same arg_VRSHL_S64_3s;
+static bool trans_VRSHL_S64_3s(DisasContext *ctx, arg_VRSHL_S64_3s *a);
+typedef arg_3same arg_VRSHL_S_3s;
+static bool trans_VRSHL_S_3s(DisasContext *ctx, arg_VRSHL_S_3s *a);
+typedef arg_3same arg_VRSHL_U64_3s;
+static bool trans_VRSHL_U64_3s(DisasContext *ctx, arg_VRSHL_U64_3s *a);
+typedef arg_3same arg_VRSHL_U_3s;
+static bool trans_VRSHL_U_3s(DisasContext *ctx, arg_VRSHL_U_3s *a);
+typedef arg_3same arg_VQRSHL_S64_3s;
+static bool trans_VQRSHL_S64_3s(DisasContext *ctx, arg_VQRSHL_S64_3s *a);
+typedef arg_3same arg_VQRSHL_S_3s;
+static bool trans_VQRSHL_S_3s(DisasContext *ctx, arg_VQRSHL_S_3s *a);
+typedef arg_3same arg_VQRSHL_U64_3s;
+static bool trans_VQRSHL_U64_3s(DisasContext *ctx, arg_VQRSHL_U64_3s *a);
+typedef arg_3same arg_VQRSHL_U_3s;
+static bool trans_VQRSHL_U_3s(DisasContext *ctx, arg_VQRSHL_U_3s *a);
+typedef arg_3same arg_VMAX_S_3s;
+static bool trans_VMAX_S_3s(DisasContext *ctx, arg_VMAX_S_3s *a);
+typedef arg_3same arg_VMAX_U_3s;
+static bool trans_VMAX_U_3s(DisasContext *ctx, arg_VMAX_U_3s *a);
+typedef arg_3same arg_VMIN_S_3s;
+static bool trans_VMIN_S_3s(DisasContext *ctx, arg_VMIN_S_3s *a);
+typedef arg_3same arg_VMIN_U_3s;
+static bool trans_VMIN_U_3s(DisasContext *ctx, arg_VMIN_U_3s *a);
+typedef arg_3same arg_VABD_S_3s;
+static bool trans_VABD_S_3s(DisasContext *ctx, arg_VABD_S_3s *a);
+typedef arg_3same arg_VABD_U_3s;
+static bool trans_VABD_U_3s(DisasContext *ctx, arg_VABD_U_3s *a);
+typedef arg_3same arg_VABA_S_3s;
+static bool trans_VABA_S_3s(DisasContext *ctx, arg_VABA_S_3s *a);
+typedef arg_3same arg_VABA_U_3s;
+static bool trans_VABA_U_3s(DisasContext *ctx, arg_VABA_U_3s *a);
+typedef arg_3same arg_VADD_3s;
+static bool trans_VADD_3s(DisasContext *ctx, arg_VADD_3s *a);
+typedef arg_3same arg_VSUB_3s;
+static bool trans_VSUB_3s(DisasContext *ctx, arg_VSUB_3s *a);
+typedef arg_3same arg_VTST_3s;
+static bool trans_VTST_3s(DisasContext *ctx, arg_VTST_3s *a);
+typedef arg_3same arg_VCEQ_3s;
+static bool trans_VCEQ_3s(DisasContext *ctx, arg_VCEQ_3s *a);
+typedef arg_3same arg_VMLA_3s;
+static bool trans_VMLA_3s(DisasContext *ctx, arg_VMLA_3s *a);
+typedef arg_3same arg_VMLS_3s;
+static bool trans_VMLS_3s(DisasContext *ctx, arg_VMLS_3s *a);
+typedef arg_3same arg_VMUL_3s;
+static bool trans_VMUL_3s(DisasContext *ctx, arg_VMUL_3s *a);
+typedef arg_3same arg_VMUL_p_3s;
+static bool trans_VMUL_p_3s(DisasContext *ctx, arg_VMUL_p_3s *a);
+typedef arg_3same arg_VPMAX_S_3s;
+static bool trans_VPMAX_S_3s(DisasContext *ctx, arg_VPMAX_S_3s *a);
+typedef arg_3same arg_VPMAX_U_3s;
+static bool trans_VPMAX_U_3s(DisasContext *ctx, arg_VPMAX_U_3s *a);
+typedef arg_3same arg_VPMIN_S_3s;
+static bool trans_VPMIN_S_3s(DisasContext *ctx, arg_VPMIN_S_3s *a);
+typedef arg_3same arg_VPMIN_U_3s;
+static bool trans_VPMIN_U_3s(DisasContext *ctx, arg_VPMIN_U_3s *a);
+typedef arg_3same arg_VQDMULH_3s;
+static bool trans_VQDMULH_3s(DisasContext *ctx, arg_VQDMULH_3s *a);
+typedef arg_3same arg_VQRDMULH_3s;
+static bool trans_VQRDMULH_3s(DisasContext *ctx, arg_VQRDMULH_3s *a);
+typedef arg_3same arg_VPADD_3s;
+static bool trans_VPADD_3s(DisasContext *ctx, arg_VPADD_3s *a);
+typedef arg_3same arg_VQRDMLAH_3s;
+static bool trans_VQRDMLAH_3s(DisasContext *ctx, arg_VQRDMLAH_3s *a);
+typedef arg_3same arg_SHA1C_3s;
+static bool trans_SHA1C_3s(DisasContext *ctx, arg_SHA1C_3s *a);
+typedef arg_3same arg_SHA1P_3s;
+static bool trans_SHA1P_3s(DisasContext *ctx, arg_SHA1P_3s *a);
+typedef arg_3same arg_SHA1M_3s;
+static bool trans_SHA1M_3s(DisasContext *ctx, arg_SHA1M_3s *a);
+typedef arg_3same arg_SHA1SU0_3s;
+static bool trans_SHA1SU0_3s(DisasContext *ctx, arg_SHA1SU0_3s *a);
+typedef arg_3same arg_SHA256H_3s;
+static bool trans_SHA256H_3s(DisasContext *ctx, arg_SHA256H_3s *a);
+typedef arg_3same arg_SHA256H2_3s;
+static bool trans_SHA256H2_3s(DisasContext *ctx, arg_SHA256H2_3s *a);
+typedef arg_3same arg_SHA256SU1_3s;
+static bool trans_SHA256SU1_3s(DisasContext *ctx, arg_SHA256SU1_3s *a);
+typedef arg_3same arg_VFMA_fp_3s;
+static bool trans_VFMA_fp_3s(DisasContext *ctx, arg_VFMA_fp_3s *a);
+typedef arg_3same arg_VFMS_fp_3s;
+static bool trans_VFMS_fp_3s(DisasContext *ctx, arg_VFMS_fp_3s *a);
+typedef arg_3same arg_VQRDMLSH_3s;
+static bool trans_VQRDMLSH_3s(DisasContext *ctx, arg_VQRDMLSH_3s *a);
+typedef arg_3same arg_VADD_fp_3s;
+static bool trans_VADD_fp_3s(DisasContext *ctx, arg_VADD_fp_3s *a);
+typedef arg_3same arg_VSUB_fp_3s;
+static bool trans_VSUB_fp_3s(DisasContext *ctx, arg_VSUB_fp_3s *a);
+typedef arg_3same arg_VPADD_fp_3s;
+static bool trans_VPADD_fp_3s(DisasContext *ctx, arg_VPADD_fp_3s *a);
+typedef arg_3same arg_VABD_fp_3s;
+static bool trans_VABD_fp_3s(DisasContext *ctx, arg_VABD_fp_3s *a);
+typedef arg_3same arg_VMLA_fp_3s;
+static bool trans_VMLA_fp_3s(DisasContext *ctx, arg_VMLA_fp_3s *a);
+typedef arg_3same arg_VMLS_fp_3s;
+static bool trans_VMLS_fp_3s(DisasContext *ctx, arg_VMLS_fp_3s *a);
+typedef arg_3same arg_VMUL_fp_3s;
+static bool trans_VMUL_fp_3s(DisasContext *ctx, arg_VMUL_fp_3s *a);
+typedef arg_3same arg_VCEQ_fp_3s;
+static bool trans_VCEQ_fp_3s(DisasContext *ctx, arg_VCEQ_fp_3s *a);
+typedef arg_3same arg_VCGE_fp_3s;
+static bool trans_VCGE_fp_3s(DisasContext *ctx, arg_VCGE_fp_3s *a);
+typedef arg_3same arg_VACGE_fp_3s;
+static bool trans_VACGE_fp_3s(DisasContext *ctx, arg_VACGE_fp_3s *a);
+typedef arg_3same arg_VCGT_fp_3s;
+static bool trans_VCGT_fp_3s(DisasContext *ctx, arg_VCGT_fp_3s *a);
+typedef arg_3same arg_VACGT_fp_3s;
+static bool trans_VACGT_fp_3s(DisasContext *ctx, arg_VACGT_fp_3s *a);
+typedef arg_3same arg_VMAX_fp_3s;
+static bool trans_VMAX_fp_3s(DisasContext *ctx, arg_VMAX_fp_3s *a);
+typedef arg_3same arg_VMIN_fp_3s;
+static bool trans_VMIN_fp_3s(DisasContext *ctx, arg_VMIN_fp_3s *a);
+typedef arg_3same arg_VPMAX_fp_3s;
+static bool trans_VPMAX_fp_3s(DisasContext *ctx, arg_VPMAX_fp_3s *a);
+typedef arg_3same arg_VPMIN_fp_3s;
+static bool trans_VPMIN_fp_3s(DisasContext *ctx, arg_VPMIN_fp_3s *a);
+typedef arg_3same arg_VRECPS_fp_3s;
+static bool trans_VRECPS_fp_3s(DisasContext *ctx, arg_VRECPS_fp_3s *a);
+typedef arg_3same arg_VRSQRTS_fp_3s;
+static bool trans_VRSQRTS_fp_3s(DisasContext *ctx, arg_VRSQRTS_fp_3s *a);
+typedef arg_3same arg_VMAXNM_fp_3s;
+static bool trans_VMAXNM_fp_3s(DisasContext *ctx, arg_VMAXNM_fp_3s *a);
+typedef arg_3same arg_VMINNM_fp_3s;
+static bool trans_VMINNM_fp_3s(DisasContext *ctx, arg_VMINNM_fp_3s *a);
+typedef arg_2reg_shift arg_VSHR_S_2sh;
+static bool trans_VSHR_S_2sh(DisasContext *ctx, arg_VSHR_S_2sh *a);
+typedef arg_2reg_shift arg_VSHR_U_2sh;
+static bool trans_VSHR_U_2sh(DisasContext *ctx, arg_VSHR_U_2sh *a);
+typedef arg_2reg_shift arg_VSRA_S_2sh;
+static bool trans_VSRA_S_2sh(DisasContext *ctx, arg_VSRA_S_2sh *a);
+typedef arg_2reg_shift arg_VSRA_U_2sh;
+static bool trans_VSRA_U_2sh(DisasContext *ctx, arg_VSRA_U_2sh *a);
+typedef arg_2reg_shift arg_VRSHR_S_2sh;
+static bool trans_VRSHR_S_2sh(DisasContext *ctx, arg_VRSHR_S_2sh *a);
+typedef arg_2reg_shift arg_VRSHR_U_2sh;
+static bool trans_VRSHR_U_2sh(DisasContext *ctx, arg_VRSHR_U_2sh *a);
+typedef arg_2reg_shift arg_VRSRA_S_2sh;
+static bool trans_VRSRA_S_2sh(DisasContext *ctx, arg_VRSRA_S_2sh *a);
+typedef arg_2reg_shift arg_VRSRA_U_2sh;
+static bool trans_VRSRA_U_2sh(DisasContext *ctx, arg_VRSRA_U_2sh *a);
+typedef arg_2reg_shift arg_VSRI_2sh;
+static bool trans_VSRI_2sh(DisasContext *ctx, arg_VSRI_2sh *a);
+typedef arg_2reg_shift arg_VSHL_2sh;
+static bool trans_VSHL_2sh(DisasContext *ctx, arg_VSHL_2sh *a);
+typedef arg_2reg_shift arg_VSLI_2sh;
+static bool trans_VSLI_2sh(DisasContext *ctx, arg_VSLI_2sh *a);
+typedef arg_2reg_shift arg_VQSHLU_64_2sh;
+static bool trans_VQSHLU_64_2sh(DisasContext *ctx, arg_VQSHLU_64_2sh *a);
+typedef arg_2reg_shift arg_VQSHLU_2sh;
+static bool trans_VQSHLU_2sh(DisasContext *ctx, arg_VQSHLU_2sh *a);
+typedef arg_2reg_shift arg_VQSHL_S_64_2sh;
+static bool trans_VQSHL_S_64_2sh(DisasContext *ctx, arg_VQSHL_S_64_2sh *a);
+typedef arg_2reg_shift arg_VQSHL_S_2sh;
+static bool trans_VQSHL_S_2sh(DisasContext *ctx, arg_VQSHL_S_2sh *a);
+typedef arg_2reg_shift arg_VQSHL_U_64_2sh;
+static bool trans_VQSHL_U_64_2sh(DisasContext *ctx, arg_VQSHL_U_64_2sh *a);
+typedef arg_2reg_shift arg_VQSHL_U_2sh;
+static bool trans_VQSHL_U_2sh(DisasContext *ctx, arg_VQSHL_U_2sh *a);
+typedef arg_2reg_shift arg_VSHRN_64_2sh;
+static bool trans_VSHRN_64_2sh(DisasContext *ctx, arg_VSHRN_64_2sh *a);
+typedef arg_2reg_shift arg_VSHRN_32_2sh;
+static bool trans_VSHRN_32_2sh(DisasContext *ctx, arg_VSHRN_32_2sh *a);
+typedef arg_2reg_shift arg_VSHRN_16_2sh;
+static bool trans_VSHRN_16_2sh(DisasContext *ctx, arg_VSHRN_16_2sh *a);
+typedef arg_2reg_shift arg_VRSHRN_64_2sh;
+static bool trans_VRSHRN_64_2sh(DisasContext *ctx, arg_VRSHRN_64_2sh *a);
+typedef arg_2reg_shift arg_VRSHRN_32_2sh;
+static bool trans_VRSHRN_32_2sh(DisasContext *ctx, arg_VRSHRN_32_2sh *a);
+typedef arg_2reg_shift arg_VRSHRN_16_2sh;
+static bool trans_VRSHRN_16_2sh(DisasContext *ctx, arg_VRSHRN_16_2sh *a);
+typedef arg_2reg_shift arg_VQSHRUN_64_2sh;
+static bool trans_VQSHRUN_64_2sh(DisasContext *ctx, arg_VQSHRUN_64_2sh *a);
+typedef arg_2reg_shift arg_VQSHRUN_32_2sh;
+static bool trans_VQSHRUN_32_2sh(DisasContext *ctx, arg_VQSHRUN_32_2sh *a);
+typedef arg_2reg_shift arg_VQSHRUN_16_2sh;
+static bool trans_VQSHRUN_16_2sh(DisasContext *ctx, arg_VQSHRUN_16_2sh *a);
+typedef arg_2reg_shift arg_VQRSHRUN_64_2sh;
+static bool trans_VQRSHRUN_64_2sh(DisasContext *ctx, arg_VQRSHRUN_64_2sh *a);
+typedef arg_2reg_shift arg_VQRSHRUN_32_2sh;
+static bool trans_VQRSHRUN_32_2sh(DisasContext *ctx, arg_VQRSHRUN_32_2sh *a);
+typedef arg_2reg_shift arg_VQRSHRUN_16_2sh;
+static bool trans_VQRSHRUN_16_2sh(DisasContext *ctx, arg_VQRSHRUN_16_2sh *a);
+typedef arg_2reg_shift arg_VQSHRN_S64_2sh;
+static bool trans_VQSHRN_S64_2sh(DisasContext *ctx, arg_VQSHRN_S64_2sh *a);
+typedef arg_2reg_shift arg_VQSHRN_S32_2sh;
+static bool trans_VQSHRN_S32_2sh(DisasContext *ctx, arg_VQSHRN_S32_2sh *a);
+typedef arg_2reg_shift arg_VQSHRN_S16_2sh;
+static bool trans_VQSHRN_S16_2sh(DisasContext *ctx, arg_VQSHRN_S16_2sh *a);
+typedef arg_2reg_shift arg_VQRSHRN_S64_2sh;
+static bool trans_VQRSHRN_S64_2sh(DisasContext *ctx, arg_VQRSHRN_S64_2sh *a);
+typedef arg_2reg_shift arg_VQRSHRN_S32_2sh;
+static bool trans_VQRSHRN_S32_2sh(DisasContext *ctx, arg_VQRSHRN_S32_2sh *a);
+typedef arg_2reg_shift arg_VQRSHRN_S16_2sh;
+static bool trans_VQRSHRN_S16_2sh(DisasContext *ctx, arg_VQRSHRN_S16_2sh *a);
+typedef arg_2reg_shift arg_VQSHRN_U64_2sh;
+static bool trans_VQSHRN_U64_2sh(DisasContext *ctx, arg_VQSHRN_U64_2sh *a);
+typedef arg_2reg_shift arg_VQSHRN_U32_2sh;
+static bool trans_VQSHRN_U32_2sh(DisasContext *ctx, arg_VQSHRN_U32_2sh *a);
+typedef arg_2reg_shift arg_VQSHRN_U16_2sh;
+static bool trans_VQSHRN_U16_2sh(DisasContext *ctx, arg_VQSHRN_U16_2sh *a);
+typedef arg_2reg_shift arg_VQRSHRN_U64_2sh;
+static bool trans_VQRSHRN_U64_2sh(DisasContext *ctx, arg_VQRSHRN_U64_2sh *a);
+typedef arg_2reg_shift arg_VQRSHRN_U32_2sh;
+static bool trans_VQRSHRN_U32_2sh(DisasContext *ctx, arg_VQRSHRN_U32_2sh *a);
+typedef arg_2reg_shift arg_VQRSHRN_U16_2sh;
+static bool trans_VQRSHRN_U16_2sh(DisasContext *ctx, arg_VQRSHRN_U16_2sh *a);
+typedef arg_2reg_shift arg_VSHLL_S_2sh;
+static bool trans_VSHLL_S_2sh(DisasContext *ctx, arg_VSHLL_S_2sh *a);
+typedef arg_2reg_shift arg_VSHLL_U_2sh;
+static bool trans_VSHLL_U_2sh(DisasContext *ctx, arg_VSHLL_U_2sh *a);
+typedef arg_2reg_shift arg_VCVT_SF_2sh;
+static bool trans_VCVT_SF_2sh(DisasContext *ctx, arg_VCVT_SF_2sh *a);
+typedef arg_2reg_shift arg_VCVT_UF_2sh;
+static bool trans_VCVT_UF_2sh(DisasContext *ctx, arg_VCVT_UF_2sh *a);
+typedef arg_2reg_shift arg_VCVT_FS_2sh;
+static bool trans_VCVT_FS_2sh(DisasContext *ctx, arg_VCVT_FS_2sh *a);
+typedef arg_2reg_shift arg_VCVT_FU_2sh;
+static bool trans_VCVT_FU_2sh(DisasContext *ctx, arg_VCVT_FU_2sh *a);
+typedef arg_1reg_imm arg_Vimm_1r;
+static bool trans_Vimm_1r(DisasContext *ctx, arg_Vimm_1r *a);
+typedef arg_disas_neon_dp3 arg_VEXT;
+static bool trans_VEXT(DisasContext *ctx, arg_VEXT *a);
+typedef arg_disas_neon_dp4 arg_VTBL;
+static bool trans_VTBL(DisasContext *ctx, arg_VTBL *a);
+typedef arg_disas_neon_dp5 arg_VDUP_scalar;
+static bool trans_VDUP_scalar(DisasContext *ctx, arg_VDUP_scalar *a);
+typedef arg_2misc arg_VREV64;
+static bool trans_VREV64(DisasContext *ctx, arg_VREV64 *a);
+typedef arg_2misc arg_VREV32;
+static bool trans_VREV32(DisasContext *ctx, arg_VREV32 *a);
+typedef arg_2misc arg_VREV16;
+static bool trans_VREV16(DisasContext *ctx, arg_VREV16 *a);
+typedef arg_2misc arg_VPADDL_S;
+static bool trans_VPADDL_S(DisasContext *ctx, arg_VPADDL_S *a);
+typedef arg_2misc arg_VPADDL_U;
+static bool trans_VPADDL_U(DisasContext *ctx, arg_VPADDL_U *a);
+typedef arg_2misc arg_AESE;
+static bool trans_AESE(DisasContext *ctx, arg_AESE *a);
+typedef arg_2misc arg_AESD;
+static bool trans_AESD(DisasContext *ctx, arg_AESD *a);
+typedef arg_2misc arg_AESMC;
+static bool trans_AESMC(DisasContext *ctx, arg_AESMC *a);
+typedef arg_2misc arg_AESIMC;
+static bool trans_AESIMC(DisasContext *ctx, arg_AESIMC *a);
+typedef arg_2misc arg_VCLS;
+static bool trans_VCLS(DisasContext *ctx, arg_VCLS *a);
+typedef arg_2misc arg_VCLZ;
+static bool trans_VCLZ(DisasContext *ctx, arg_VCLZ *a);
+typedef arg_2misc arg_VCNT;
+static bool trans_VCNT(DisasContext *ctx, arg_VCNT *a);
+typedef arg_2misc arg_VMVN;
+static bool trans_VMVN(DisasContext *ctx, arg_VMVN *a);
+typedef arg_2misc arg_VPADAL_S;
+static bool trans_VPADAL_S(DisasContext *ctx, arg_VPADAL_S *a);
+typedef arg_2misc arg_VPADAL_U;
+static bool trans_VPADAL_U(DisasContext *ctx, arg_VPADAL_U *a);
+typedef arg_2misc arg_VQABS;
+static bool trans_VQABS(DisasContext *ctx, arg_VQABS *a);
+typedef arg_2misc arg_VQNEG;
+static bool trans_VQNEG(DisasContext *ctx, arg_VQNEG *a);
+typedef arg_2misc arg_VCGT0;
+static bool trans_VCGT0(DisasContext *ctx, arg_VCGT0 *a);
+typedef arg_2misc arg_VCGE0;
+static bool trans_VCGE0(DisasContext *ctx, arg_VCGE0 *a);
+typedef arg_2misc arg_VCEQ0;
+static bool trans_VCEQ0(DisasContext *ctx, arg_VCEQ0 *a);
+typedef arg_2misc arg_VCLE0;
+static bool trans_VCLE0(DisasContext *ctx, arg_VCLE0 *a);
+typedef arg_2misc arg_VCLT0;
+static bool trans_VCLT0(DisasContext *ctx, arg_VCLT0 *a);
+typedef arg_2misc arg_SHA1H;
+static bool trans_SHA1H(DisasContext *ctx, arg_SHA1H *a);
+typedef arg_2misc arg_VABS;
+static bool trans_VABS(DisasContext *ctx, arg_VABS *a);
+typedef arg_2misc arg_VNEG;
+static bool trans_VNEG(DisasContext *ctx, arg_VNEG *a);
+typedef arg_2misc arg_VCGT0_F;
+static bool trans_VCGT0_F(DisasContext *ctx, arg_VCGT0_F *a);
+typedef arg_2misc arg_VCGE0_F;
+static bool trans_VCGE0_F(DisasContext *ctx, arg_VCGE0_F *a);
+typedef arg_2misc arg_VCEQ0_F;
+static bool trans_VCEQ0_F(DisasContext *ctx, arg_VCEQ0_F *a);
+typedef arg_2misc arg_VCLE0_F;
+static bool trans_VCLE0_F(DisasContext *ctx, arg_VCLE0_F *a);
+typedef arg_2misc arg_VCLT0_F;
+static bool trans_VCLT0_F(DisasContext *ctx, arg_VCLT0_F *a);
+typedef arg_2misc arg_VABS_F;
+static bool trans_VABS_F(DisasContext *ctx, arg_VABS_F *a);
+typedef arg_2misc arg_VNEG_F;
+static bool trans_VNEG_F(DisasContext *ctx, arg_VNEG_F *a);
+typedef arg_2misc arg_VSWP;
+static bool trans_VSWP(DisasContext *ctx, arg_VSWP *a);
+typedef arg_2misc arg_VTRN;
+static bool trans_VTRN(DisasContext *ctx, arg_VTRN *a);
+typedef arg_2misc arg_VUZP;
+static bool trans_VUZP(DisasContext *ctx, arg_VUZP *a);
+typedef arg_2misc arg_VZIP;
+static bool trans_VZIP(DisasContext *ctx, arg_VZIP *a);
+typedef arg_2misc arg_VMOVN;
+static bool trans_VMOVN(DisasContext *ctx, arg_VMOVN *a);
+typedef arg_2misc arg_VQMOVUN;
+static bool trans_VQMOVUN(DisasContext *ctx, arg_VQMOVUN *a);
+typedef arg_2misc arg_VQMOVN_S;
+static bool trans_VQMOVN_S(DisasContext *ctx, arg_VQMOVN_S *a);
+typedef arg_2misc arg_VQMOVN_U;
+static bool trans_VQMOVN_U(DisasContext *ctx, arg_VQMOVN_U *a);
+typedef arg_2misc arg_VSHLL;
+static bool trans_VSHLL(DisasContext *ctx, arg_VSHLL *a);
+typedef arg_2misc arg_SHA1SU1;
+static bool trans_SHA1SU1(DisasContext *ctx, arg_SHA1SU1 *a);
+typedef arg_2misc arg_SHA256SU0;
+static bool trans_SHA256SU0(DisasContext *ctx, arg_SHA256SU0 *a);
+typedef arg_2misc arg_VRINTN;
+static bool trans_VRINTN(DisasContext *ctx, arg_VRINTN *a);
+typedef arg_2misc arg_VRINTX;
+static bool trans_VRINTX(DisasContext *ctx, arg_VRINTX *a);
+typedef arg_2misc arg_VRINTA;
+static bool trans_VRINTA(DisasContext *ctx, arg_VRINTA *a);
+typedef arg_2misc arg_VRINTZ;
+static bool trans_VRINTZ(DisasContext *ctx, arg_VRINTZ *a);
+typedef arg_2misc arg_VCVT_F16_F32;
+static bool trans_VCVT_F16_F32(DisasContext *ctx, arg_VCVT_F16_F32 *a);
+typedef arg_2misc arg_VRINTM;
+static bool trans_VRINTM(DisasContext *ctx, arg_VRINTM *a);
+typedef arg_2misc arg_VCVT_F32_F16;
+static bool trans_VCVT_F32_F16(DisasContext *ctx, arg_VCVT_F32_F16 *a);
+typedef arg_2misc arg_VRINTP;
+static bool trans_VRINTP(DisasContext *ctx, arg_VRINTP *a);
+typedef arg_2misc arg_VCVTAS;
+static bool trans_VCVTAS(DisasContext *ctx, arg_VCVTAS *a);
+typedef arg_2misc arg_VCVTAU;
+static bool trans_VCVTAU(DisasContext *ctx, arg_VCVTAU *a);
+typedef arg_2misc arg_VCVTNS;
+static bool trans_VCVTNS(DisasContext *ctx, arg_VCVTNS *a);
+typedef arg_2misc arg_VCVTNU;
+static bool trans_VCVTNU(DisasContext *ctx, arg_VCVTNU *a);
+typedef arg_2misc arg_VCVTPS;
+static bool trans_VCVTPS(DisasContext *ctx, arg_VCVTPS *a);
+typedef arg_2misc arg_VCVTPU;
+static bool trans_VCVTPU(DisasContext *ctx, arg_VCVTPU *a);
+typedef arg_2misc arg_VCVTMS;
+static bool trans_VCVTMS(DisasContext *ctx, arg_VCVTMS *a);
+typedef arg_2misc arg_VCVTMU;
+static bool trans_VCVTMU(DisasContext *ctx, arg_VCVTMU *a);
+typedef arg_2misc arg_VRECPE;
+static bool trans_VRECPE(DisasContext *ctx, arg_VRECPE *a);
+typedef arg_2misc arg_VRSQRTE;
+static bool trans_VRSQRTE(DisasContext *ctx, arg_VRSQRTE *a);
+typedef arg_2misc arg_VRECPE_F;
+static bool trans_VRECPE_F(DisasContext *ctx, arg_VRECPE_F *a);
+typedef arg_2misc arg_VRSQRTE_F;
+static bool trans_VRSQRTE_F(DisasContext *ctx, arg_VRSQRTE_F *a);
+typedef arg_2misc arg_VCVT_FS;
+static bool trans_VCVT_FS(DisasContext *ctx, arg_VCVT_FS *a);
+typedef arg_2misc arg_VCVT_FU;
+static bool trans_VCVT_FU(DisasContext *ctx, arg_VCVT_FU *a);
+typedef arg_2misc arg_VCVT_SF;
+static bool trans_VCVT_SF(DisasContext *ctx, arg_VCVT_SF *a);
+typedef arg_2misc arg_VCVT_UF;
+static bool trans_VCVT_UF(DisasContext *ctx, arg_VCVT_UF *a);
+typedef arg_3diff arg_VADDL_S_3d;
+static bool trans_VADDL_S_3d(DisasContext *ctx, arg_VADDL_S_3d *a);
+typedef arg_3diff arg_VADDL_U_3d;
+static bool trans_VADDL_U_3d(DisasContext *ctx, arg_VADDL_U_3d *a);
+typedef arg_3diff arg_VADDW_S_3d;
+static bool trans_VADDW_S_3d(DisasContext *ctx, arg_VADDW_S_3d *a);
+typedef arg_3diff arg_VADDW_U_3d;
+static bool trans_VADDW_U_3d(DisasContext *ctx, arg_VADDW_U_3d *a);
+typedef arg_3diff arg_VSUBL_S_3d;
+static bool trans_VSUBL_S_3d(DisasContext *ctx, arg_VSUBL_S_3d *a);
+typedef arg_3diff arg_VSUBL_U_3d;
+static bool trans_VSUBL_U_3d(DisasContext *ctx, arg_VSUBL_U_3d *a);
+typedef arg_3diff arg_VSUBW_S_3d;
+static bool trans_VSUBW_S_3d(DisasContext *ctx, arg_VSUBW_S_3d *a);
+typedef arg_3diff arg_VSUBW_U_3d;
+static bool trans_VSUBW_U_3d(DisasContext *ctx, arg_VSUBW_U_3d *a);
+typedef arg_3diff arg_VADDHN_3d;
+static bool trans_VADDHN_3d(DisasContext *ctx, arg_VADDHN_3d *a);
+typedef arg_3diff arg_VRADDHN_3d;
+static bool trans_VRADDHN_3d(DisasContext *ctx, arg_VRADDHN_3d *a);
+typedef arg_3diff arg_VABAL_S_3d;
+static bool trans_VABAL_S_3d(DisasContext *ctx, arg_VABAL_S_3d *a);
+typedef arg_3diff arg_VABAL_U_3d;
+static bool trans_VABAL_U_3d(DisasContext *ctx, arg_VABAL_U_3d *a);
+typedef arg_3diff arg_VSUBHN_3d;
+static bool trans_VSUBHN_3d(DisasContext *ctx, arg_VSUBHN_3d *a);
+typedef arg_3diff arg_VRSUBHN_3d;
+static bool trans_VRSUBHN_3d(DisasContext *ctx, arg_VRSUBHN_3d *a);
+typedef arg_3diff arg_VABDL_S_3d;
+static bool trans_VABDL_S_3d(DisasContext *ctx, arg_VABDL_S_3d *a);
+typedef arg_3diff arg_VABDL_U_3d;
+static bool trans_VABDL_U_3d(DisasContext *ctx, arg_VABDL_U_3d *a);
+typedef arg_3diff arg_VMLAL_S_3d;
+static bool trans_VMLAL_S_3d(DisasContext *ctx, arg_VMLAL_S_3d *a);
+typedef arg_3diff arg_VMLAL_U_3d;
+static bool trans_VMLAL_U_3d(DisasContext *ctx, arg_VMLAL_U_3d *a);
+typedef arg_3diff arg_VQDMLAL_3d;
+static bool trans_VQDMLAL_3d(DisasContext *ctx, arg_VQDMLAL_3d *a);
+typedef arg_3diff arg_VMLSL_S_3d;
+static bool trans_VMLSL_S_3d(DisasContext *ctx, arg_VMLSL_S_3d *a);
+typedef arg_3diff arg_VMLSL_U_3d;
+static bool trans_VMLSL_U_3d(DisasContext *ctx, arg_VMLSL_U_3d *a);
+typedef arg_3diff arg_VQDMLSL_3d;
+static bool trans_VQDMLSL_3d(DisasContext *ctx, arg_VQDMLSL_3d *a);
+typedef arg_3diff arg_VMULL_S_3d;
+static bool trans_VMULL_S_3d(DisasContext *ctx, arg_VMULL_S_3d *a);
+typedef arg_3diff arg_VMULL_U_3d;
+static bool trans_VMULL_U_3d(DisasContext *ctx, arg_VMULL_U_3d *a);
+typedef arg_3diff arg_VQDMULL_3d;
+static bool trans_VQDMULL_3d(DisasContext *ctx, arg_VQDMULL_3d *a);
+typedef arg_3diff arg_VMULL_P_3d;
+static bool trans_VMULL_P_3d(DisasContext *ctx, arg_VMULL_P_3d *a);
+typedef arg_2scalar arg_VMLA_2sc;
+static bool trans_VMLA_2sc(DisasContext *ctx, arg_VMLA_2sc *a);
+typedef arg_2scalar arg_VMLA_F_2sc;
+static bool trans_VMLA_F_2sc(DisasContext *ctx, arg_VMLA_F_2sc *a);
+typedef arg_2scalar arg_VMLAL_S_2sc;
+static bool trans_VMLAL_S_2sc(DisasContext *ctx, arg_VMLAL_S_2sc *a);
+typedef arg_2scalar arg_VMLAL_U_2sc;
+static bool trans_VMLAL_U_2sc(DisasContext *ctx, arg_VMLAL_U_2sc *a);
+typedef arg_2scalar arg_VQDMLAL_2sc;
+static bool trans_VQDMLAL_2sc(DisasContext *ctx, arg_VQDMLAL_2sc *a);
+typedef arg_2scalar arg_VMLS_2sc;
+static bool trans_VMLS_2sc(DisasContext *ctx, arg_VMLS_2sc *a);
+typedef arg_2scalar arg_VMLS_F_2sc;
+static bool trans_VMLS_F_2sc(DisasContext *ctx, arg_VMLS_F_2sc *a);
+typedef arg_2scalar arg_VMLSL_S_2sc;
+static bool trans_VMLSL_S_2sc(DisasContext *ctx, arg_VMLSL_S_2sc *a);
+typedef arg_2scalar arg_VMLSL_U_2sc;
+static bool trans_VMLSL_U_2sc(DisasContext *ctx, arg_VMLSL_U_2sc *a);
+typedef arg_2scalar arg_VQDMLSL_2sc;
+static bool trans_VQDMLSL_2sc(DisasContext *ctx, arg_VQDMLSL_2sc *a);
+typedef arg_2scalar arg_VMUL_2sc;
+static bool trans_VMUL_2sc(DisasContext *ctx, arg_VMUL_2sc *a);
+typedef arg_2scalar arg_VMUL_F_2sc;
+static bool trans_VMUL_F_2sc(DisasContext *ctx, arg_VMUL_F_2sc *a);
+typedef arg_2scalar arg_VMULL_S_2sc;
+static bool trans_VMULL_S_2sc(DisasContext *ctx, arg_VMULL_S_2sc *a);
+typedef arg_2scalar arg_VMULL_U_2sc;
+static bool trans_VMULL_U_2sc(DisasContext *ctx, arg_VMULL_U_2sc *a);
+typedef arg_2scalar arg_VQDMULL_2sc;
+static bool trans_VQDMULL_2sc(DisasContext *ctx, arg_VQDMULL_2sc *a);
+typedef arg_2scalar arg_VQDMULH_2sc;
+static bool trans_VQDMULH_2sc(DisasContext *ctx, arg_VQDMULH_2sc *a);
+typedef arg_2scalar arg_VQRDMULH_2sc;
+static bool trans_VQRDMULH_2sc(DisasContext *ctx, arg_VQRDMULH_2sc *a);
+typedef arg_2scalar arg_VQRDMLAH_2sc;
+static bool trans_VQRDMLAH_2sc(DisasContext *ctx, arg_VQRDMLAH_2sc *a);
+typedef arg_2scalar arg_VQRDMLSH_2sc;
+static bool trans_VQRDMLSH_2sc(DisasContext *ctx, arg_VQRDMLSH_2sc *a);
+
+static void disas_neon_dp_extract_1reg_imm(DisasContext *ctx, arg_1reg_imm *a, uint32_t insn)
+{
+    a->q = extract32(insn, 6, 1);
+    a->imm = deposit32(deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 16, 3)), 7, 25, extract32(insn, 24, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_dp_extract_2misc(DisasContext *ctx, arg_2misc *a, uint32_t insn)
+{
+    a->size = extract32(insn, 18, 2);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_dp_extract_2misc_q0(DisasContext *ctx, arg_2misc *a, uint32_t insn)
+{
+    a->size = extract32(insn, 18, 2);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->q = 0;
+}
+
+static void disas_neon_dp_extract_2misc_q1(DisasContext *ctx, arg_2misc *a, uint32_t insn)
+{
+    a->size = extract32(insn, 18, 2);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->q = 1;
+}
+
+static void disas_neon_dp_extract_2reg_shl_b(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->shift = extract32(insn, 16, 3);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 0;
+}
+
+static void disas_neon_dp_extract_2reg_shl_d(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->shift = extract32(insn, 16, 6);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 3;
+}
+
+static void disas_neon_dp_extract_2reg_shl_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->shift = extract32(insn, 16, 4);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 1;
+}
+
+static void disas_neon_dp_extract_2reg_shl_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->shift = extract32(insn, 16, 5);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 2;
+}
+
+static void disas_neon_dp_extract_2reg_shll_b(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->shift = extract32(insn, 16, 3);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 0;
+    a->q = 0;
+}
+
+static void disas_neon_dp_extract_2reg_shll_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->shift = extract32(insn, 16, 4);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 1;
+    a->q = 0;
+}
+
+static void disas_neon_dp_extract_2reg_shll_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->shift = extract32(insn, 16, 5);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 2;
+    a->q = 0;
+}
+
+static void disas_neon_dp_extract_2reg_shr_b(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 0;
+    a->shift = rsub_8(ctx, extract32(insn, 16, 3));
+}
+
+static void disas_neon_dp_extract_2reg_shr_d(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 3;
+    a->shift = rsub_64(ctx, extract32(insn, 16, 6));
+}
+
+static void disas_neon_dp_extract_2reg_shr_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 1;
+    a->shift = rsub_16(ctx, extract32(insn, 16, 4));
+}
+
+static void disas_neon_dp_extract_2reg_shr_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 2;
+    a->shift = rsub_32(ctx, extract32(insn, 16, 5));
+}
+
+static void disas_neon_dp_extract_2reg_shrn_d(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 3;
+    a->q = 0;
+    a->shift = rsub_32(ctx, extract32(insn, 16, 5));
+}
+
+static void disas_neon_dp_extract_2reg_shrn_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 1;
+    a->q = 0;
+    a->shift = rsub_8(ctx, extract32(insn, 16, 3));
+}
+
+static void disas_neon_dp_extract_2reg_shrn_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 2;
+    a->q = 0;
+    a->shift = rsub_16(ctx, extract32(insn, 16, 4));
+}
+
+static void disas_neon_dp_extract_2reg_vcvt(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn)
+{
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 0;
+    a->shift = rsub_32(ctx, extract32(insn, 16, 5));
+}
+
+static void disas_neon_dp_extract_2scalar(DisasContext *ctx, arg_2scalar *a, uint32_t insn)
+{
+    a->q = extract32(insn, 24, 1);
+    a->size = extract32(insn, 20, 2);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_dp_extract_2scalar_q0(DisasContext *ctx, arg_2scalar *a, uint32_t insn)
+{
+    a->size = extract32(insn, 20, 2);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->q = 0;
+}
+
+static void disas_neon_dp_extract_3diff(DisasContext *ctx, arg_3diff *a, uint32_t insn)
+{
+    a->size = extract32(insn, 20, 2);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_dp_extract_3same(DisasContext *ctx, arg_3same *a, uint32_t insn)
+{
+    a->size = extract32(insn, 20, 2);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_dp_extract_3same_64_rev(DisasContext *ctx, arg_3same *a, uint32_t insn)
+{
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vn = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 3;
+}
+
+static void disas_neon_dp_extract_3same_crypto(DisasContext *ctx, arg_3same *a, uint32_t insn)
+{
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 0;
+    a->q = 1;
+}
+
+static void disas_neon_dp_extract_3same_fp(DisasContext *ctx, arg_3same *a, uint32_t insn)
+{
+    a->size = extract32(insn, 20, 1);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_dp_extract_3same_fp_q0(DisasContext *ctx, arg_3same *a, uint32_t insn)
+{
+    a->size = extract32(insn, 20, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->q = 0;
+}
+
+static void disas_neon_dp_extract_3same_logic(DisasContext *ctx, arg_3same *a, uint32_t insn)
+{
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 0;
+}
+
+static void disas_neon_dp_extract_3same_q0(DisasContext *ctx, arg_3same *a, uint32_t insn)
+{
+    a->size = extract32(insn, 20, 2);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->q = 0;
+}
+
+static void disas_neon_dp_extract_3same_rev(DisasContext *ctx, arg_3same *a, uint32_t insn)
+{
+    a->size = extract32(insn, 20, 2);
+    a->q = extract32(insn, 6, 1);
+    a->vn = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vm = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_dp_extract_disas_neon_dp_Fmt_24(DisasContext *ctx, arg_disas_neon_dp3 *a, uint32_t insn)
+{
+    a->imm = extract32(insn, 8, 4);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_dp_extract_disas_neon_dp_Fmt_25(DisasContext *ctx, arg_disas_neon_dp4 *a, uint32_t insn)
+{
+    a->len = extract32(insn, 8, 2);
+    a->op = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_dp_extract_disas_neon_dp_Fmt_26(DisasContext *ctx, arg_disas_neon_dp5 *a, uint32_t insn)
+{
+    a->index = extract32(insn, 17, 3);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 0;
+}
+
+static void disas_neon_dp_extract_disas_neon_dp_Fmt_27(DisasContext *ctx, arg_disas_neon_dp5 *a, uint32_t insn)
+{
+    a->index = extract32(insn, 18, 2);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 1;
+}
+
+static void disas_neon_dp_extract_disas_neon_dp_Fmt_28(DisasContext *ctx, arg_disas_neon_dp5 *a, uint32_t insn)
+{
+    a->index = extract32(insn, 19, 1);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 2;
+}
+
+static bool disas_neon_dp(DisasContext *ctx, uint32_t insn)
+{
+    union {
+        arg_1reg_imm f_1reg_imm;
+        arg_2misc f_2misc;
+        arg_2reg_shift f_2reg_shift;
+        arg_2scalar f_2scalar;
+        arg_3diff f_3diff;
+        arg_3same f_3same;
+        arg_disas_neon_dp3 f_disas_neon_dp3;
+        arg_disas_neon_dp4 f_disas_neon_dp4;
+        arg_disas_neon_dp5 f_disas_neon_dp5;
+    } u;
+
+    switch (insn & 0xfe800010) {
+    case 0xf2000000:
+        /* 1111001. 0....... ........ ...0.... */
+        switch (insn & 0x01000f00) {
+        case 0x00000000:
+            /* 11110010 0....... ....0000 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VHADD_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000100:
+            /* 11110010 0....... ....0001 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VRHADD_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000200:
+            /* 11110010 0....... ....0010 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VHSUB_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000300:
+            /* 11110010 0....... ....0011 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VCGT_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000400:
+            /* 11110010 0....... ....0100 ...0.... */
+            disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn);
+            if (trans_VSHL_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000500:
+            /* 11110010 0....... ....0101 ...0.... */
+            if ((insn & 0x00300000) == 0x00300000) {
+                /* 11110010 0.11.... ....0101 ...0.... */
+                disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn);
+                if (trans_VRSHL_S64_3s(ctx, &u.f_3same)) return true;
+            }
+            disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn);
+            if (trans_VRSHL_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000600:
+            /* 11110010 0....... ....0110 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VMAX_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000700:
+            /* 11110010 0....... ....0111 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VABD_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000800:
+            /* 11110010 0....... ....1000 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VADD_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000900:
+            /* 11110010 0....... ....1001 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VMLA_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000a00:
+            /* 11110010 0....... ....1010 ...0.... */
+            disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn);
+            switch ((insn >> 6) & 0x1) {
+            case 0x0:
+                /* 11110010 0....... ....1010 .0.0.... */
+                if (trans_VPMAX_S_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000b00:
+            /* 11110010 0....... ....1011 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VQDMULH_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000c00:
+            /* 11110010 0....... ....1100 ...0.... */
+            disas_neon_dp_extract_3same_crypto(ctx, &u.f_3same, insn);
+            switch (insn & 0x00300040) {
+            case 0x00000040:
+                /* 11110010 0.00.... ....1100 .1.0.... */
+                if (trans_SHA1C_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x00100040:
+                /* 11110010 0.01.... ....1100 .1.0.... */
+                if (trans_SHA1P_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x00200040:
+                /* 11110010 0.10.... ....1100 .1.0.... */
+                if (trans_SHA1M_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x00300040:
+                /* 11110010 0.11.... ....1100 .1.0.... */
+                if (trans_SHA1SU0_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000d00:
+            /* 11110010 0....... ....1101 ...0.... */
+            disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110010 0.0..... ....1101 ...0.... */
+                if (trans_VADD_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x1:
+                /* 11110010 0.1..... ....1101 ...0.... */
+                if (trans_VSUB_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000e00:
+            /* 11110010 0....... ....1110 ...0.... */
+            disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110010 0.0..... ....1110 ...0.... */
+                if (trans_VCEQ_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000f00:
+            /* 11110010 0....... ....1111 ...0.... */
+            disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110010 0.0..... ....1111 ...0.... */
+                if (trans_VMAX_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x1:
+                /* 11110010 0.1..... ....1111 ...0.... */
+                if (trans_VMIN_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x01000000:
+            /* 11110011 0....... ....0000 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VHADD_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000100:
+            /* 11110011 0....... ....0001 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VRHADD_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000200:
+            /* 11110011 0....... ....0010 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VHSUB_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000300:
+            /* 11110011 0....... ....0011 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VCGT_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000400:
+            /* 11110011 0....... ....0100 ...0.... */
+            disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn);
+            if (trans_VSHL_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000500:
+            /* 11110011 0....... ....0101 ...0.... */
+            if ((insn & 0x00300000) == 0x00300000) {
+                /* 11110011 0.11.... ....0101 ...0.... */
+                disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn);
+                if (trans_VRSHL_U64_3s(ctx, &u.f_3same)) return true;
+            }
+            disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn);
+            if (trans_VRSHL_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000600:
+            /* 11110011 0....... ....0110 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VMAX_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000700:
+            /* 11110011 0....... ....0111 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VABD_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000800:
+            /* 11110011 0....... ....1000 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VSUB_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000900:
+            /* 11110011 0....... ....1001 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VMLS_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000a00:
+            /* 11110011 0....... ....1010 ...0.... */
+            disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn);
+            switch ((insn >> 6) & 0x1) {
+            case 0x0:
+                /* 11110011 0....... ....1010 .0.0.... */
+                if (trans_VPMAX_U_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x01000b00:
+            /* 11110011 0....... ....1011 ...0.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VQRDMULH_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000c00:
+            /* 11110011 0....... ....1100 ...0.... */
+            disas_neon_dp_extract_3same_crypto(ctx, &u.f_3same, insn);
+            switch (insn & 0x00300040) {
+            case 0x00000040:
+                /* 11110011 0.00.... ....1100 .1.0.... */
+                if (trans_SHA256H_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x00100040:
+                /* 11110011 0.01.... ....1100 .1.0.... */
+                if (trans_SHA256H2_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x00200040:
+                /* 11110011 0.10.... ....1100 .1.0.... */
+                if (trans_SHA256SU1_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x01000d00:
+            /* 11110011 0....... ....1101 ...0.... */
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110011 0.0..... ....1101 ...0.... */
+                disas_neon_dp_extract_3same_fp_q0(ctx, &u.f_3same, insn);
+                switch ((insn >> 6) & 0x1) {
+                case 0x0:
+                    /* 11110011 0.0..... ....1101 .0.0.... */
+                    if (trans_VPADD_fp_3s(ctx, &u.f_3same)) return true;
+                    return false;
+                }
+                return false;
+            case 0x1:
+                /* 11110011 0.1..... ....1101 ...0.... */
+                disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+                if (trans_VABD_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x01000e00:
+            /* 11110011 0....... ....1110 ...0.... */
+            disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110011 0.0..... ....1110 ...0.... */
+                if (trans_VCGE_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 0.1..... ....1110 ...0.... */
+                if (trans_VCGT_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x01000f00:
+            /* 11110011 0....... ....1111 ...0.... */
+            disas_neon_dp_extract_3same_fp_q0(ctx, &u.f_3same, insn);
+            switch (insn & 0x00200040) {
+            case 0x00000000:
+                /* 11110011 0.0..... ....1111 .0.0.... */
+                if (trans_VPMAX_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x00200000:
+                /* 11110011 0.1..... ....1111 .0.0.... */
+                if (trans_VPMIN_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        }
+        return false;
+    case 0xf2000010:
+        /* 1111001. 0....... ........ ...1.... */
+        switch (insn & 0x01000f00) {
+        case 0x00000000:
+            /* 11110010 0....... ....0000 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VQADD_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000100:
+            /* 11110010 0....... ....0001 ...1.... */
+            disas_neon_dp_extract_3same_logic(ctx, &u.f_3same, insn);
+            switch ((insn >> 20) & 0x3) {
+            case 0x0:
+                /* 11110010 0.00.... ....0001 ...1.... */
+                if (trans_VAND_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x1:
+                /* 11110010 0.01.... ....0001 ...1.... */
+                if (trans_VBIC_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x2:
+                /* 11110010 0.10.... ....0001 ...1.... */
+                if (trans_VORR_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x3:
+                /* 11110010 0.11.... ....0001 ...1.... */
+                if (trans_VORN_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000200:
+            /* 11110010 0....... ....0010 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VQSUB_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000300:
+            /* 11110010 0....... ....0011 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VCGE_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000400:
+            /* 11110010 0....... ....0100 ...1.... */
+            if ((insn & 0x00300000) == 0x00300000) {
+                /* 11110010 0.11.... ....0100 ...1.... */
+                disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn);
+                if (trans_VQSHL_S64_3s(ctx, &u.f_3same)) return true;
+            }
+            disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn);
+            if (trans_VQSHL_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000500:
+            /* 11110010 0....... ....0101 ...1.... */
+            if ((insn & 0x00300000) == 0x00300000) {
+                /* 11110010 0.11.... ....0101 ...1.... */
+                disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn);
+                if (trans_VQRSHL_S64_3s(ctx, &u.f_3same)) return true;
+            }
+            disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn);
+            if (trans_VQRSHL_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000600:
+            /* 11110010 0....... ....0110 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VMIN_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000700:
+            /* 11110010 0....... ....0111 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VABA_S_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000800:
+            /* 11110010 0....... ....1000 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VTST_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000900:
+            /* 11110010 0....... ....1001 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VMUL_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x00000a00:
+            /* 11110010 0....... ....1010 ...1.... */
+            disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn);
+            switch ((insn >> 6) & 0x1) {
+            case 0x0:
+                /* 11110010 0....... ....1010 .0.1.... */
+                if (trans_VPMIN_S_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000b00:
+            /* 11110010 0....... ....1011 ...1.... */
+            disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn);
+            switch ((insn >> 6) & 0x1) {
+            case 0x0:
+                /* 11110010 0....... ....1011 .0.1.... */
+                if (trans_VPADD_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000c00:
+            /* 11110010 0....... ....1100 ...1.... */
+            disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110010 0.0..... ....1100 ...1.... */
+                if (trans_VFMA_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x1:
+                /* 11110010 0.1..... ....1100 ...1.... */
+                if (trans_VFMS_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000d00:
+            /* 11110010 0....... ....1101 ...1.... */
+            disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110010 0.0..... ....1101 ...1.... */
+                if (trans_VMLA_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x1:
+                /* 11110010 0.1..... ....1101 ...1.... */
+                if (trans_VMLS_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000f00:
+            /* 11110010 0....... ....1111 ...1.... */
+            disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110010 0.0..... ....1111 ...1.... */
+                if (trans_VRECPS_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x1:
+                /* 11110010 0.1..... ....1111 ...1.... */
+                if (trans_VRSQRTS_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x01000000:
+            /* 11110011 0....... ....0000 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VQADD_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000100:
+            /* 11110011 0....... ....0001 ...1.... */
+            disas_neon_dp_extract_3same_logic(ctx, &u.f_3same, insn);
+            switch ((insn >> 20) & 0x3) {
+            case 0x0:
+                /* 11110011 0.00.... ....0001 ...1.... */
+                if (trans_VEOR_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 0.01.... ....0001 ...1.... */
+                if (trans_VBSL_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x2:
+                /* 11110011 0.10.... ....0001 ...1.... */
+                if (trans_VBIT_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x3:
+                /* 11110011 0.11.... ....0001 ...1.... */
+                if (trans_VBIF_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x01000200:
+            /* 11110011 0....... ....0010 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VQSUB_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000300:
+            /* 11110011 0....... ....0011 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VCGE_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000400:
+            /* 11110011 0....... ....0100 ...1.... */
+            if ((insn & 0x00300000) == 0x00300000) {
+                /* 11110011 0.11.... ....0100 ...1.... */
+                disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn);
+                if (trans_VQSHL_U64_3s(ctx, &u.f_3same)) return true;
+            }
+            disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn);
+            if (trans_VQSHL_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000500:
+            /* 11110011 0....... ....0101 ...1.... */
+            if ((insn & 0x00300000) == 0x00300000) {
+                /* 11110011 0.11.... ....0101 ...1.... */
+                disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn);
+                if (trans_VQRSHL_U64_3s(ctx, &u.f_3same)) return true;
+            }
+            disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn);
+            if (trans_VQRSHL_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000600:
+            /* 11110011 0....... ....0110 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VMIN_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000700:
+            /* 11110011 0....... ....0111 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VABA_U_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000800:
+            /* 11110011 0....... ....1000 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VCEQ_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000900:
+            /* 11110011 0....... ....1001 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VMUL_p_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000a00:
+            /* 11110011 0....... ....1010 ...1.... */
+            disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn);
+            switch ((insn >> 6) & 0x1) {
+            case 0x0:
+                /* 11110011 0....... ....1010 .0.1.... */
+                if (trans_VPMIN_U_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x01000b00:
+            /* 11110011 0....... ....1011 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VQRDMLAH_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000c00:
+            /* 11110011 0....... ....1100 ...1.... */
+            disas_neon_dp_extract_3same(ctx, &u.f_3same, insn);
+            if (trans_VQRDMLSH_3s(ctx, &u.f_3same)) return true;
+            return false;
+        case 0x01000d00:
+            /* 11110011 0....... ....1101 ...1.... */
+            disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110011 0.0..... ....1101 ...1.... */
+                if (trans_VMUL_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x01000e00:
+            /* 11110011 0....... ....1110 ...1.... */
+            disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110011 0.0..... ....1110 ...1.... */
+                if (trans_VACGE_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 0.1..... ....1110 ...1.... */
+                if (trans_VACGT_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        case 0x01000f00:
+            /* 11110011 0....... ....1111 ...1.... */
+            disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 11110011 0.0..... ....1111 ...1.... */
+                if (trans_VMAXNM_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 0.1..... ....1111 ...1.... */
+                if (trans_VMINNM_fp_3s(ctx, &u.f_3same)) return true;
+                return false;
+            }
+            return false;
+        }
+        return false;
+    case 0xf2800000:
+        /* 1111001. 1....... ........ ...0.... */
+        if ((insn & 0x00300000) == 0x00300000) {
+            /* 1111001. 1.11.... ........ ...0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1.11.... ........ ...0.... */
+                disas_neon_dp_extract_disas_neon_dp_Fmt_24(ctx, &u.f_disas_neon_dp3, insn);
+                if (trans_VEXT(ctx, &u.f_disas_neon_dp3)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1.11.... ........ ...0.... */
+                switch ((insn >> 10) & 0x3) {
+                case 0x0:
+                    /* 11110011 1.11.... ....00.. ...0.... */
+                    switch (insn & 0x00030380) {
+                    case 0x00000000:
+                        /* 11110011 1.11..00 ....0000 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VREV64(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000080:
+                        /* 11110011 1.11..00 ....0000 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VREV32(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000100:
+                        /* 11110011 1.11..00 ....0001 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VREV16(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000200:
+                        /* 11110011 1.11..00 ....0010 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VPADDL_S(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000280:
+                        /* 11110011 1.11..00 ....0010 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VPADDL_U(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000300:
+                        /* 11110011 1.11..00 ....0011 0..0.... */
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.11..00 ....0011 00.0.... */
+                            disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn);
+                            if (trans_AESE(ctx, &u.f_2misc)) return true;
+                            return false;
+                        case 0x1:
+                            /* 11110011 1.11..00 ....0011 01.0.... */
+                            disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn);
+                            if (trans_AESD(ctx, &u.f_2misc)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00000380:
+                        /* 11110011 1.11..00 ....0011 1..0.... */
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.11..00 ....0011 10.0.... */
+                            disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn);
+                            if (trans_AESMC(ctx, &u.f_2misc)) return true;
+                            return false;
+                        case 0x1:
+                            /* 11110011 1.11..00 ....0011 11.0.... */
+                            disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn);
+                            if (trans_AESIMC(ctx, &u.f_2misc)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00010000:
+                        /* 11110011 1.11..01 ....0000 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCGT0(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010080:
+                        /* 11110011 1.11..01 ....0000 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCGE0(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010100:
+                        /* 11110011 1.11..01 ....0001 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCEQ0(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010180:
+                        /* 11110011 1.11..01 ....0001 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCLE0(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010200:
+                        /* 11110011 1.11..01 ....0010 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCLT0(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010280:
+                        /* 11110011 1.11..01 ....0010 1..0.... */
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x1:
+                            /* 11110011 1.11..01 ....0010 11.0.... */
+                            disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn);
+                            if (trans_SHA1H(ctx, &u.f_2misc)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00010300:
+                        /* 11110011 1.11..01 ....0011 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VABS(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010380:
+                        /* 11110011 1.11..01 ....0011 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VNEG(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020000:
+                        /* 11110011 1.11..10 ....0000 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VSWP(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020080:
+                        /* 11110011 1.11..10 ....0000 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VTRN(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020100:
+                        /* 11110011 1.11..10 ....0001 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VUZP(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020180:
+                        /* 11110011 1.11..10 ....0001 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VZIP(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020200:
+                        /* 11110011 1.11..10 ....0010 0..0.... */
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.11..10 ....0010 00.0.... */
+                            disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn);
+                            if (trans_VMOVN(ctx, &u.f_2misc)) return true;
+                            return false;
+                        case 0x1:
+                            /* 11110011 1.11..10 ....0010 01.0.... */
+                            disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn);
+                            if (trans_VQMOVUN(ctx, &u.f_2misc)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00020280:
+                        /* 11110011 1.11..10 ....0010 1..0.... */
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.11..10 ....0010 10.0.... */
+                            disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn);
+                            if (trans_VQMOVN_S(ctx, &u.f_2misc)) return true;
+                            return false;
+                        case 0x1:
+                            /* 11110011 1.11..10 ....0010 11.0.... */
+                            disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn);
+                            if (trans_VQMOVN_U(ctx, &u.f_2misc)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00020300:
+                        /* 11110011 1.11..10 ....0011 0..0.... */
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.11..10 ....0011 00.0.... */
+                            disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn);
+                            if (trans_VSHLL(ctx, &u.f_2misc)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00020380:
+                        /* 11110011 1.11..10 ....0011 1..0.... */
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.11..10 ....0011 10.0.... */
+                            disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn);
+                            if (trans_SHA1SU1(ctx, &u.f_2misc)) return true;
+                            return false;
+                        case 0x1:
+                            /* 11110011 1.11..10 ....0011 11.0.... */
+                            disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn);
+                            if (trans_SHA256SU0(ctx, &u.f_2misc)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00030000:
+                        /* 11110011 1.11..11 ....0000 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVTAS(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030080:
+                        /* 11110011 1.11..11 ....0000 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVTAU(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030100:
+                        /* 11110011 1.11..11 ....0001 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVTNS(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030180:
+                        /* 11110011 1.11..11 ....0001 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVTNU(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030200:
+                        /* 11110011 1.11..11 ....0010 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVTPS(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030280:
+                        /* 11110011 1.11..11 ....0010 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVTPU(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030300:
+                        /* 11110011 1.11..11 ....0011 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVTMS(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030380:
+                        /* 11110011 1.11..11 ....0011 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVTMU(ctx, &u.f_2misc)) return true;
+                        return false;
+                    }
+                    return false;
+                case 0x1:
+                    /* 11110011 1.11.... ....01.. ...0.... */
+                    switch (insn & 0x00030380) {
+                    case 0x00000000:
+                        /* 11110011 1.11..00 ....0100 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCLS(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000080:
+                        /* 11110011 1.11..00 ....0100 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCLZ(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000100:
+                        /* 11110011 1.11..00 ....0101 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCNT(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000180:
+                        /* 11110011 1.11..00 ....0101 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VMVN(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000200:
+                        /* 11110011 1.11..00 ....0110 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VPADAL_S(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000280:
+                        /* 11110011 1.11..00 ....0110 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VPADAL_U(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000300:
+                        /* 11110011 1.11..00 ....0111 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VQABS(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00000380:
+                        /* 11110011 1.11..00 ....0111 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VQNEG(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010000:
+                        /* 11110011 1.11..01 ....0100 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCGT0_F(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010080:
+                        /* 11110011 1.11..01 ....0100 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCGE0_F(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010100:
+                        /* 11110011 1.11..01 ....0101 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCEQ0_F(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010180:
+                        /* 11110011 1.11..01 ....0101 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCLE0_F(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010200:
+                        /* 11110011 1.11..01 ....0110 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCLT0_F(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010300:
+                        /* 11110011 1.11..01 ....0111 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VABS_F(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00010380:
+                        /* 11110011 1.11..01 ....0111 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VNEG_F(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020000:
+                        /* 11110011 1.11..10 ....0100 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VRINTN(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020080:
+                        /* 11110011 1.11..10 ....0100 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VRINTX(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020100:
+                        /* 11110011 1.11..10 ....0101 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VRINTA(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020180:
+                        /* 11110011 1.11..10 ....0101 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VRINTZ(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020200:
+                        /* 11110011 1.11..10 ....0110 0..0.... */
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.11..10 ....0110 00.0.... */
+                            disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn);
+                            if (trans_VCVT_F16_F32(ctx, &u.f_2misc)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00020280:
+                        /* 11110011 1.11..10 ....0110 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VRINTM(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00020300:
+                        /* 11110011 1.11..10 ....0111 0..0.... */
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.11..10 ....0111 00.0.... */
+                            disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn);
+                            if (trans_VCVT_F32_F16(ctx, &u.f_2misc)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00020380:
+                        /* 11110011 1.11..10 ....0111 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VRINTP(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030000:
+                        /* 11110011 1.11..11 ....0100 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VRECPE(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030080:
+                        /* 11110011 1.11..11 ....0100 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VRSQRTE(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030100:
+                        /* 11110011 1.11..11 ....0101 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VRECPE_F(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030180:
+                        /* 11110011 1.11..11 ....0101 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VRSQRTE_F(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030200:
+                        /* 11110011 1.11..11 ....0110 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVT_FS(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030280:
+                        /* 11110011 1.11..11 ....0110 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVT_FU(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030300:
+                        /* 11110011 1.11..11 ....0111 0..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVT_SF(ctx, &u.f_2misc)) return true;
+                        return false;
+                    case 0x00030380:
+                        /* 11110011 1.11..11 ....0111 1..0.... */
+                        disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn);
+                        if (trans_VCVT_UF(ctx, &u.f_2misc)) return true;
+                        return false;
+                    }
+                    return false;
+                case 0x2:
+                    /* 11110011 1.11.... ....10.. ...0.... */
+                    disas_neon_dp_extract_disas_neon_dp_Fmt_25(ctx, &u.f_disas_neon_dp4, insn);
+                    if (trans_VTBL(ctx, &u.f_disas_neon_dp4)) return true;
+                    return false;
+                case 0x3:
+                    /* 11110011 1.11.... ....11.. ...0.... */
+                    switch (insn & 0x00010380) {
+                    case 0x00000000:
+                        /* 11110011 1.11...0 ....1100 0..0.... */
+                        switch ((insn >> 17) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.11..00 ....1100 0..0.... */
+                            switch ((insn >> 18) & 0x1) {
+                            case 0x1:
+                                /* 11110011 1.11.100 ....1100 0..0.... */
+                                disas_neon_dp_extract_disas_neon_dp_Fmt_28(ctx, &u.f_disas_neon_dp5, insn);
+                                if (trans_VDUP_scalar(ctx, &u.f_disas_neon_dp5)) return true;
+                                return false;
+                            }
+                            return false;
+                        case 0x1:
+                            /* 11110011 1.11..10 ....1100 0..0.... */
+                            disas_neon_dp_extract_disas_neon_dp_Fmt_27(ctx, &u.f_disas_neon_dp5, insn);
+                            if (trans_VDUP_scalar(ctx, &u.f_disas_neon_dp5)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00010000:
+                        /* 11110011 1.11...1 ....1100 0..0.... */
+                        disas_neon_dp_extract_disas_neon_dp_Fmt_26(ctx, &u.f_disas_neon_dp5, insn);
+                        if (trans_VDUP_scalar(ctx, &u.f_disas_neon_dp5)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            }
+        }
+        switch (insn & 0x00000f40) {
+        case 0x00000000:
+            /* 1111001. 1....... ....0000 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0000 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VADDL_S_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....0000 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VADDL_U_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000040:
+            /* 1111001. 1....... ....0000 .1.0.... */
+            disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn);
+            if (trans_VMLA_2sc(ctx, &u.f_2scalar)) return true;
+            return false;
+        case 0x00000100:
+            /* 1111001. 1....... ....0001 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0001 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VADDW_S_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....0001 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VADDW_U_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000140:
+            /* 1111001. 1....... ....0001 .1.0.... */
+            disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn);
+            if (trans_VMLA_F_2sc(ctx, &u.f_2scalar)) return true;
+            return false;
+        case 0x00000200:
+            /* 1111001. 1....... ....0010 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0010 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VSUBL_S_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....0010 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VSUBL_U_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000240:
+            /* 1111001. 1....... ....0010 .1.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0010 .1.0.... */
+                disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn);
+                if (trans_VMLAL_S_2sc(ctx, &u.f_2scalar)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....0010 .1.0.... */
+                disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn);
+                if (trans_VMLAL_U_2sc(ctx, &u.f_2scalar)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000300:
+            /* 1111001. 1....... ....0011 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0011 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VSUBW_S_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....0011 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VSUBW_U_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000340:
+            /* 1111001. 1....... ....0011 .1.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0011 .1.0.... */
+                disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn);
+                if (trans_VQDMLAL_2sc(ctx, &u.f_2scalar)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000400:
+            /* 1111001. 1....... ....0100 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0100 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VADDHN_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....0100 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VRADDHN_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000440:
+            /* 1111001. 1....... ....0100 .1.0.... */
+            disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn);
+            if (trans_VMLS_2sc(ctx, &u.f_2scalar)) return true;
+            return false;
+        case 0x00000500:
+            /* 1111001. 1....... ....0101 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0101 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VABAL_S_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....0101 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VABAL_U_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000540:
+            /* 1111001. 1....... ....0101 .1.0.... */
+            disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn);
+            if (trans_VMLS_F_2sc(ctx, &u.f_2scalar)) return true;
+            return false;
+        case 0x00000600:
+            /* 1111001. 1....... ....0110 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0110 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VSUBHN_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....0110 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VRSUBHN_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000640:
+            /* 1111001. 1....... ....0110 .1.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0110 .1.0.... */
+                disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn);
+                if (trans_VMLSL_S_2sc(ctx, &u.f_2scalar)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....0110 .1.0.... */
+                disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn);
+                if (trans_VMLSL_U_2sc(ctx, &u.f_2scalar)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000700:
+            /* 1111001. 1....... ....0111 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0111 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VABDL_S_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....0111 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VABDL_U_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000740:
+            /* 1111001. 1....... ....0111 .1.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....0111 .1.0.... */
+                disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn);
+                if (trans_VQDMLSL_2sc(ctx, &u.f_2scalar)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000800:
+            /* 1111001. 1....... ....1000 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....1000 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VMLAL_S_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....1000 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VMLAL_U_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000840:
+            /* 1111001. 1....... ....1000 .1.0.... */
+            disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn);
+            if (trans_VMUL_2sc(ctx, &u.f_2scalar)) return true;
+            return false;
+        case 0x00000900:
+            /* 1111001. 1....... ....1001 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....1001 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VQDMLAL_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000940:
+            /* 1111001. 1....... ....1001 .1.0.... */
+            disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn);
+            if (trans_VMUL_F_2sc(ctx, &u.f_2scalar)) return true;
+            return false;
+        case 0x00000a00:
+            /* 1111001. 1....... ....1010 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....1010 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VMLSL_S_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....1010 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VMLSL_U_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000a40:
+            /* 1111001. 1....... ....1010 .1.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....1010 .1.0.... */
+                disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn);
+                if (trans_VMULL_S_2sc(ctx, &u.f_2scalar)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....1010 .1.0.... */
+                disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn);
+                if (trans_VMULL_U_2sc(ctx, &u.f_2scalar)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000b00:
+            /* 1111001. 1....... ....1011 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....1011 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VQDMLSL_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000b40:
+            /* 1111001. 1....... ....1011 .1.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....1011 .1.0.... */
+                disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn);
+                if (trans_VQDMULL_2sc(ctx, &u.f_2scalar)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000c00:
+            /* 1111001. 1....... ....1100 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....1100 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VMULL_S_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            case 0x1:
+                /* 11110011 1....... ....1100 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VMULL_U_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000c40:
+            /* 1111001. 1....... ....1100 .1.0.... */
+            disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn);
+            if (trans_VQDMULH_2sc(ctx, &u.f_2scalar)) return true;
+            return false;
+        case 0x00000d00:
+            /* 1111001. 1....... ....1101 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....1101 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VQDMULL_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000d40:
+            /* 1111001. 1....... ....1101 .1.0.... */
+            disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn);
+            if (trans_VQRDMULH_2sc(ctx, &u.f_2scalar)) return true;
+            return false;
+        case 0x00000e00:
+            /* 1111001. 1....... ....1110 .0.0.... */
+            switch ((insn >> 24) & 0x1) {
+            case 0x0:
+                /* 11110010 1....... ....1110 .0.0.... */
+                disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn);
+                if (trans_VMULL_P_3d(ctx, &u.f_3diff)) return true;
+                return false;
+            }
+            return false;
+        case 0x00000e40:
+            /* 1111001. 1....... ....1110 .1.0.... */
+            disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn);
+            if (trans_VQRDMLAH_2sc(ctx, &u.f_2scalar)) return true;
+            return false;
+        case 0x00000f40:
+            /* 1111001. 1....... ....1111 .1.0.... */
+            disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn);
+            if (trans_VQRDMLSH_2sc(ctx, &u.f_2scalar)) return true;
+            return false;
+        }
+        return false;
+    case 0xf2800010:
+        /* 1111001. 1....... ........ ...1.... */
+        switch ((insn >> 7) & 0x1) {
+        case 0x0:
+            /* 1111001. 1....... ........ 0..1.... */
+            switch ((insn >> 21) & 0x1) {
+            case 0x0:
+                /* 1111001. 1.0..... ........ 0..1.... */
+                switch ((insn >> 20) & 0x1) {
+                case 0x0:
+                    /* 1111001. 1.00.... ........ 0..1.... */
+                    switch ((insn >> 19) & 0x1) {
+                    case 0x0:
+                        /* 1111001. 1.000... ........ 0..1.... */
+                        disas_neon_dp_extract_1reg_imm(ctx, &u.f_1reg_imm, insn);
+                        u.f_1reg_imm.cmode = extract32(insn, 8, 4);
+                        u.f_1reg_imm.op = extract32(insn, 5, 1);
+                        if (trans_Vimm_1r(ctx, &u.f_1reg_imm)) return true;
+                        return false;
+                    case 0x1:
+                        /* 1111001. 1.001... ........ 0..1.... */
+                        switch (insn & 0x01000f00) {
+                        case 0x00000000:
+                            /* 11110010 1.001... ....0000 0..1.... */
+                            disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x00000100:
+                            /* 11110010 1.001... ....0001 0..1.... */
+                            disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x00000200:
+                            /* 11110010 1.001... ....0010 0..1.... */
+                            disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x00000300:
+                            /* 11110010 1.001... ....0011 0..1.... */
+                            disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x00000500:
+                            /* 11110010 1.001... ....0101 0..1.... */
+                            disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x00000700:
+                            /* 11110010 1.001... ....0111 0..1.... */
+                            disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VQSHL_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x00000800:
+                            /* 11110010 1.001... ....1000 0..1.... */
+                            disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn);
+                            switch ((insn >> 6) & 0x1) {
+                            case 0x0:
+                                /* 11110010 1.001... ....1000 00.1.... */
+                                if (trans_VSHRN_16_2sh(ctx, &u.f_2reg_shift)) return true;
+                                return false;
+                            case 0x1:
+                                /* 11110010 1.001... ....1000 01.1.... */
+                                if (trans_VRSHRN_16_2sh(ctx, &u.f_2reg_shift)) return true;
+                                return false;
+                            }
+                            return false;
+                        case 0x00000900:
+                            /* 11110010 1.001... ....1001 0..1.... */
+                            disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn);
+                            switch ((insn >> 6) & 0x1) {
+                            case 0x0:
+                                /* 11110010 1.001... ....1001 00.1.... */
+                                if (trans_VQSHRN_S16_2sh(ctx, &u.f_2reg_shift)) return true;
+                                return false;
+                            case 0x1:
+                                /* 11110010 1.001... ....1001 01.1.... */
+                                if (trans_VQRSHRN_S16_2sh(ctx, &u.f_2reg_shift)) return true;
+                                return false;
+                            }
+                            return false;
+                        case 0x00000a00:
+                            /* 11110010 1.001... ....1010 0..1.... */
+                            disas_neon_dp_extract_2reg_shll_b(ctx, &u.f_2reg_shift, insn);
+                            switch ((insn >> 6) & 0x1) {
+                            case 0x0:
+                                /* 11110010 1.001... ....1010 00.1.... */
+                                if (trans_VSHLL_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                                return false;
+                            }
+                            return false;
+                        case 0x01000000:
+                            /* 11110011 1.001... ....0000 0..1.... */
+                            disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x01000100:
+                            /* 11110011 1.001... ....0001 0..1.... */
+                            disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x01000200:
+                            /* 11110011 1.001... ....0010 0..1.... */
+                            disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x01000300:
+                            /* 11110011 1.001... ....0011 0..1.... */
+                            disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x01000400:
+                            /* 11110011 1.001... ....0100 0..1.... */
+                            disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x01000500:
+                            /* 11110011 1.001... ....0101 0..1.... */
+                            disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x01000600:
+                            /* 11110011 1.001... ....0110 0..1.... */
+                            disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VQSHLU_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x01000700:
+                            /* 11110011 1.001... ....0111 0..1.... */
+                            disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn);
+                            if (trans_VQSHL_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x01000800:
+                            /* 11110011 1.001... ....1000 0..1.... */
+                            disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn);
+                            switch ((insn >> 6) & 0x1) {
+                            case 0x0:
+                                /* 11110011 1.001... ....1000 00.1.... */
+                                if (trans_VQSHRUN_16_2sh(ctx, &u.f_2reg_shift)) return true;
+                                return false;
+                            case 0x1:
+                                /* 11110011 1.001... ....1000 01.1.... */
+                                if (trans_VQRSHRUN_16_2sh(ctx, &u.f_2reg_shift)) return true;
+                                return false;
+                            }
+                            return false;
+                        case 0x01000900:
+                            /* 11110011 1.001... ....1001 0..1.... */
+                            disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn);
+                            switch ((insn >> 6) & 0x1) {
+                            case 0x0:
+                                /* 11110011 1.001... ....1001 00.1.... */
+                                if (trans_VQSHRN_U16_2sh(ctx, &u.f_2reg_shift)) return true;
+                                return false;
+                            case 0x1:
+                                /* 11110011 1.001... ....1001 01.1.... */
+                                if (trans_VQRSHRN_U16_2sh(ctx, &u.f_2reg_shift)) return true;
+                                return false;
+                            }
+                            return false;
+                        case 0x01000a00:
+                            /* 11110011 1.001... ....1010 0..1.... */
+                            disas_neon_dp_extract_2reg_shll_b(ctx, &u.f_2reg_shift, insn);
+                            switch ((insn >> 6) & 0x1) {
+                            case 0x0:
+                                /* 11110011 1.001... ....1010 00.1.... */
+                                if (trans_VSHLL_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                                return false;
+                            }
+                            return false;
+                        }
+                        return false;
+                    }
+                    return false;
+                case 0x1:
+                    /* 1111001. 1.01.... ........ 0..1.... */
+                    switch (insn & 0x01000f00) {
+                    case 0x00000000:
+                        /* 11110010 1.01.... ....0000 0..1.... */
+                        disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x00000100:
+                        /* 11110010 1.01.... ....0001 0..1.... */
+                        disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x00000200:
+                        /* 11110010 1.01.... ....0010 0..1.... */
+                        disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x00000300:
+                        /* 11110010 1.01.... ....0011 0..1.... */
+                        disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x00000500:
+                        /* 11110010 1.01.... ....0101 0..1.... */
+                        disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x00000700:
+                        /* 11110010 1.01.... ....0111 0..1.... */
+                        disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VQSHL_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x00000800:
+                        /* 11110010 1.01.... ....1000 0..1.... */
+                        disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn);
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110010 1.01.... ....1000 00.1.... */
+                            if (trans_VSHRN_32_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x1:
+                            /* 11110010 1.01.... ....1000 01.1.... */
+                            if (trans_VRSHRN_32_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00000900:
+                        /* 11110010 1.01.... ....1001 0..1.... */
+                        disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn);
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110010 1.01.... ....1001 00.1.... */
+                            if (trans_VQSHRN_S32_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x1:
+                            /* 11110010 1.01.... ....1001 01.1.... */
+                            if (trans_VQRSHRN_S32_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x00000a00:
+                        /* 11110010 1.01.... ....1010 0..1.... */
+                        disas_neon_dp_extract_2reg_shll_h(ctx, &u.f_2reg_shift, insn);
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110010 1.01.... ....1010 00.1.... */
+                            if (trans_VSHLL_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x01000000:
+                        /* 11110011 1.01.... ....0000 0..1.... */
+                        disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x01000100:
+                        /* 11110011 1.01.... ....0001 0..1.... */
+                        disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x01000200:
+                        /* 11110011 1.01.... ....0010 0..1.... */
+                        disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x01000300:
+                        /* 11110011 1.01.... ....0011 0..1.... */
+                        disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x01000400:
+                        /* 11110011 1.01.... ....0100 0..1.... */
+                        disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x01000500:
+                        /* 11110011 1.01.... ....0101 0..1.... */
+                        disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x01000600:
+                        /* 11110011 1.01.... ....0110 0..1.... */
+                        disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VQSHLU_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x01000700:
+                        /* 11110011 1.01.... ....0111 0..1.... */
+                        disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn);
+                        if (trans_VQSHL_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x01000800:
+                        /* 11110011 1.01.... ....1000 0..1.... */
+                        disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn);
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.01.... ....1000 00.1.... */
+                            if (trans_VQSHRUN_32_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x1:
+                            /* 11110011 1.01.... ....1000 01.1.... */
+                            if (trans_VQRSHRUN_32_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x01000900:
+                        /* 11110011 1.01.... ....1001 0..1.... */
+                        disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn);
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.01.... ....1001 00.1.... */
+                            if (trans_VQSHRN_U32_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        case 0x1:
+                            /* 11110011 1.01.... ....1001 01.1.... */
+                            if (trans_VQRSHRN_U32_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        }
+                        return false;
+                    case 0x01000a00:
+                        /* 11110011 1.01.... ....1010 0..1.... */
+                        disas_neon_dp_extract_2reg_shll_h(ctx, &u.f_2reg_shift, insn);
+                        switch ((insn >> 6) & 0x1) {
+                        case 0x0:
+                            /* 11110011 1.01.... ....1010 00.1.... */
+                            if (trans_VSHLL_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                            return false;
+                        }
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x1:
+                /* 1111001. 1.1..... ........ 0..1.... */
+                switch (insn & 0x01000f00) {
+                case 0x00000000:
+                    /* 11110010 1.1..... ....0000 0..1.... */
+                    disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x00000100:
+                    /* 11110010 1.1..... ....0001 0..1.... */
+                    disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x00000200:
+                    /* 11110010 1.1..... ....0010 0..1.... */
+                    disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x00000300:
+                    /* 11110010 1.1..... ....0011 0..1.... */
+                    disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x00000500:
+                    /* 11110010 1.1..... ....0101 0..1.... */
+                    disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x00000700:
+                    /* 11110010 1.1..... ....0111 0..1.... */
+                    disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VQSHL_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x00000800:
+                    /* 11110010 1.1..... ....1000 0..1.... */
+                    disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn);
+                    switch ((insn >> 6) & 0x1) {
+                    case 0x0:
+                        /* 11110010 1.1..... ....1000 00.1.... */
+                        if (trans_VSHRN_64_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x1:
+                        /* 11110010 1.1..... ....1000 01.1.... */
+                        if (trans_VRSHRN_64_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    }
+                    return false;
+                case 0x00000900:
+                    /* 11110010 1.1..... ....1001 0..1.... */
+                    disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn);
+                    switch ((insn >> 6) & 0x1) {
+                    case 0x0:
+                        /* 11110010 1.1..... ....1001 00.1.... */
+                        if (trans_VQSHRN_S64_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x1:
+                        /* 11110010 1.1..... ....1001 01.1.... */
+                        if (trans_VQRSHRN_S64_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    }
+                    return false;
+                case 0x00000a00:
+                    /* 11110010 1.1..... ....1010 0..1.... */
+                    disas_neon_dp_extract_2reg_shll_s(ctx, &u.f_2reg_shift, insn);
+                    switch ((insn >> 6) & 0x1) {
+                    case 0x0:
+                        /* 11110010 1.1..... ....1010 00.1.... */
+                        if (trans_VSHLL_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    }
+                    return false;
+                case 0x00000e00:
+                    /* 11110010 1.1..... ....1110 0..1.... */
+                    disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VCVT_SF_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x00000f00:
+                    /* 11110010 1.1..... ....1111 0..1.... */
+                    disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VCVT_FS_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x01000000:
+                    /* 11110011 1.1..... ....0000 0..1.... */
+                    disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x01000100:
+                    /* 11110011 1.1..... ....0001 0..1.... */
+                    disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x01000200:
+                    /* 11110011 1.1..... ....0010 0..1.... */
+                    disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x01000300:
+                    /* 11110011 1.1..... ....0011 0..1.... */
+                    disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x01000400:
+                    /* 11110011 1.1..... ....0100 0..1.... */
+                    disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x01000500:
+                    /* 11110011 1.1..... ....0101 0..1.... */
+                    disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x01000600:
+                    /* 11110011 1.1..... ....0110 0..1.... */
+                    disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VQSHLU_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x01000700:
+                    /* 11110011 1.1..... ....0111 0..1.... */
+                    disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VQSHL_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x01000800:
+                    /* 11110011 1.1..... ....1000 0..1.... */
+                    disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn);
+                    switch ((insn >> 6) & 0x1) {
+                    case 0x0:
+                        /* 11110011 1.1..... ....1000 00.1.... */
+                        if (trans_VQSHRUN_64_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x1:
+                        /* 11110011 1.1..... ....1000 01.1.... */
+                        if (trans_VQRSHRUN_64_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    }
+                    return false;
+                case 0x01000900:
+                    /* 11110011 1.1..... ....1001 0..1.... */
+                    disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn);
+                    switch ((insn >> 6) & 0x1) {
+                    case 0x0:
+                        /* 11110011 1.1..... ....1001 00.1.... */
+                        if (trans_VQSHRN_U64_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    case 0x1:
+                        /* 11110011 1.1..... ....1001 01.1.... */
+                        if (trans_VQRSHRN_U64_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    }
+                    return false;
+                case 0x01000a00:
+                    /* 11110011 1.1..... ....1010 0..1.... */
+                    disas_neon_dp_extract_2reg_shll_s(ctx, &u.f_2reg_shift, insn);
+                    switch ((insn >> 6) & 0x1) {
+                    case 0x0:
+                        /* 11110011 1.1..... ....1010 00.1.... */
+                        if (trans_VSHLL_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                        return false;
+                    }
+                    return false;
+                case 0x01000e00:
+                    /* 11110011 1.1..... ....1110 0..1.... */
+                    disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VCVT_UF_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                case 0x01000f00:
+                    /* 11110011 1.1..... ....1111 0..1.... */
+                    disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn);
+                    if (trans_VCVT_FU_2sh(ctx, &u.f_2reg_shift)) return true;
+                    return false;
+                }
+                return false;
+            }
+            return false;
+        case 0x1:
+            /* 1111001. 1....... ........ 1..1.... */
+            switch (insn & 0x01000f00) {
+            case 0x00000000:
+                /* 11110010 1....... ....0000 1..1.... */
+                disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x00000100:
+                /* 11110010 1....... ....0001 1..1.... */
+                disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x00000200:
+                /* 11110010 1....... ....0010 1..1.... */
+                disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x00000300:
+                /* 11110010 1....... ....0011 1..1.... */
+                disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x00000500:
+                /* 11110010 1....... ....0101 1..1.... */
+                disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x00000700:
+                /* 11110010 1....... ....0111 1..1.... */
+                disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VQSHL_S_64_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x01000000:
+                /* 11110011 1....... ....0000 1..1.... */
+                disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x01000100:
+                /* 11110011 1....... ....0001 1..1.... */
+                disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x01000200:
+                /* 11110011 1....... ....0010 1..1.... */
+                disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x01000300:
+                /* 11110011 1....... ....0011 1..1.... */
+                disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x01000400:
+                /* 11110011 1....... ....0100 1..1.... */
+                disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x01000500:
+                /* 11110011 1....... ....0101 1..1.... */
+                disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x01000600:
+                /* 11110011 1....... ....0110 1..1.... */
+                disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VQSHLU_64_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            case 0x01000700:
+                /* 11110011 1....... ....0111 1..1.... */
+                disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn);
+                if (trans_VQSHL_U_64_2sh(ctx, &u.f_2reg_shift)) return true;
+                return false;
+            }
+            return false;
+        }
+        return false;
+    }
+    return false;
+}
diff --git a/qemu/target/arm/decode-neon-ls.inc.c b/qemu/target/arm/decode-neon-ls.inc.c
new file mode 100644
index 0000000000..aaf998abb2
--- /dev/null
+++ b/qemu/target/arm/decode-neon-ls.inc.c
@@ -0,0 +1,149 @@
+/* This file is autogenerated by scripts/decodetree.py.  */
+
+typedef struct {
+    int align;
+    int itype;
+    int l;
+    int rm;
+    int rn;
+    int size;
+    int vd;
+} arg_disas_neon_ls0;
+
+typedef struct {
+    int a;
+    int n;
+    int rm;
+    int rn;
+    int size;
+    int t;
+    int vd;
+} arg_disas_neon_ls1;
+
+typedef struct {
+    int align;
+    int l;
+    int n;
+    int reg_idx;
+    int rm;
+    int rn;
+    int size;
+    int stride;
+    int vd;
+} arg_disas_neon_ls2;
+
+typedef arg_disas_neon_ls0 arg_VLDST_multiple;
+static bool trans_VLDST_multiple(DisasContext *ctx, arg_VLDST_multiple *a);
+typedef arg_disas_neon_ls1 arg_VLD_all_lanes;
+static bool trans_VLD_all_lanes(DisasContext *ctx, arg_VLD_all_lanes *a);
+typedef arg_disas_neon_ls2 arg_VLDST_single;
+static bool trans_VLDST_single(DisasContext *ctx, arg_VLDST_single *a);
+
+static void disas_neon_ls_extract_disas_neon_ls_Fmt_0(DisasContext *ctx, arg_disas_neon_ls0 *a, uint32_t insn)
+{
+    a->l = extract32(insn, 21, 1);
+    a->rn = extract32(insn, 16, 4);
+    a->itype = extract32(insn, 8, 4);
+    a->size = extract32(insn, 6, 2);
+    a->align = extract32(insn, 4, 2);
+    a->rm = extract32(insn, 0, 4);
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_ls_extract_disas_neon_ls_Fmt_1(DisasContext *ctx, arg_disas_neon_ls1 *a, uint32_t insn)
+{
+    a->rn = extract32(insn, 16, 4);
+    a->n = extract32(insn, 8, 2);
+    a->size = extract32(insn, 6, 2);
+    a->t = extract32(insn, 5, 1);
+    a->a = extract32(insn, 4, 1);
+    a->rm = extract32(insn, 0, 4);
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_ls_extract_disas_neon_ls_Fmt_2(DisasContext *ctx, arg_disas_neon_ls2 *a, uint32_t insn)
+{
+    a->l = extract32(insn, 21, 1);
+    a->rn = extract32(insn, 16, 4);
+    a->n = extract32(insn, 8, 2);
+    a->reg_idx = extract32(insn, 5, 3);
+    a->align = extract32(insn, 4, 1);
+    a->rm = extract32(insn, 0, 4);
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 0;
+    a->stride = 1;
+}
+
+static void disas_neon_ls_extract_disas_neon_ls_Fmt_3(DisasContext *ctx, arg_disas_neon_ls2 *a, uint32_t insn)
+{
+    a->l = extract32(insn, 21, 1);
+    a->rn = extract32(insn, 16, 4);
+    a->n = extract32(insn, 8, 2);
+    a->reg_idx = extract32(insn, 6, 2);
+    a->align = extract32(insn, 4, 2);
+    a->rm = extract32(insn, 0, 4);
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 1;
+    a->stride = plus1(ctx, extract32(insn, 5, 1));
+}
+
+static void disas_neon_ls_extract_disas_neon_ls_Fmt_4(DisasContext *ctx, arg_disas_neon_ls2 *a, uint32_t insn)
+{
+    a->l = extract32(insn, 21, 1);
+    a->rn = extract32(insn, 16, 4);
+    a->n = extract32(insn, 8, 2);
+    a->reg_idx = extract32(insn, 7, 1);
+    a->align = extract32(insn, 4, 3);
+    a->rm = extract32(insn, 0, 4);
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 2;
+    a->stride = plus1(ctx, extract32(insn, 6, 1));
+}
+
+static bool disas_neon_ls(DisasContext *ctx, uint32_t insn)
+{
+    union {
+        arg_disas_neon_ls0 f_disas_neon_ls0;
+        arg_disas_neon_ls1 f_disas_neon_ls1;
+        arg_disas_neon_ls2 f_disas_neon_ls2;
+    } u;
+
+    switch (insn & 0xff900000) {
+    case 0xf4000000:
+        /* 11110100 0..0.... ........ ........ */
+        disas_neon_ls_extract_disas_neon_ls_Fmt_0(ctx, &u.f_disas_neon_ls0, insn);
+        if (trans_VLDST_multiple(ctx, &u.f_disas_neon_ls0)) return true;
+        return false;
+    case 0xf4800000:
+        /* 11110100 1..0.... ........ ........ */
+        switch ((insn >> 10) & 0x3) {
+        case 0x0:
+            /* 11110100 1..0.... ....00.. ........ */
+            disas_neon_ls_extract_disas_neon_ls_Fmt_2(ctx, &u.f_disas_neon_ls2, insn);
+            if (trans_VLDST_single(ctx, &u.f_disas_neon_ls2)) return true;
+            return false;
+        case 0x1:
+            /* 11110100 1..0.... ....01.. ........ */
+            disas_neon_ls_extract_disas_neon_ls_Fmt_3(ctx, &u.f_disas_neon_ls2, insn);
+            if (trans_VLDST_single(ctx, &u.f_disas_neon_ls2)) return true;
+            return false;
+        case 0x2:
+            /* 11110100 1..0.... ....10.. ........ */
+            disas_neon_ls_extract_disas_neon_ls_Fmt_4(ctx, &u.f_disas_neon_ls2, insn);
+            if (trans_VLDST_single(ctx, &u.f_disas_neon_ls2)) return true;
+            return false;
+        case 0x3:
+            /* 11110100 1..0.... ....11.. ........ */
+            disas_neon_ls_extract_disas_neon_ls_Fmt_1(ctx, &u.f_disas_neon_ls1, insn);
+            switch ((insn >> 21) & 0x1) {
+            case 0x1:
+                /* 11110100 1.10.... ....11.. ........ */
+                if (trans_VLD_all_lanes(ctx, &u.f_disas_neon_ls1)) return true;
+                return false;
+            }
+            return false;
+        }
+        return false;
+    }
+    return false;
+}
diff --git a/qemu/target/arm/decode-neon-shared.inc.c b/qemu/target/arm/decode-neon-shared.inc.c
new file mode 100644
index 0000000000..58913b4365
--- /dev/null
+++ b/qemu/target/arm/decode-neon-shared.inc.c
@@ -0,0 +1,271 @@
+/* This file is autogenerated by scripts/decodetree.py.  */
+
+typedef struct {
+    int q;
+    int rot;
+    int size;
+    int vd;
+    int vm;
+    int vn;
+} arg_disas_neon_shared0;
+
+typedef struct {
+    int q;
+    int u;
+    int vd;
+    int vm;
+    int vn;
+} arg_disas_neon_shared1;
+
+typedef struct {
+    int q;
+    int s;
+    int vd;
+    int vm;
+    int vn;
+} arg_disas_neon_shared2;
+
+typedef struct {
+    int index;
+    int q;
+    int rot;
+    int size;
+    int vd;
+    int vm;
+    int vn;
+} arg_disas_neon_shared3;
+
+typedef struct {
+    int index;
+    int q;
+    int rm;
+    int u;
+    int vd;
+    int vm;
+    int vn;
+} arg_disas_neon_shared4;
+
+typedef struct {
+    int index;
+    int q;
+    int rm;
+    int s;
+    int vd;
+    int vn;
+} arg_disas_neon_shared5;
+
+typedef arg_disas_neon_shared0 arg_VCMLA;
+static bool trans_VCMLA(DisasContext *ctx, arg_VCMLA *a);
+typedef arg_disas_neon_shared0 arg_VCADD;
+static bool trans_VCADD(DisasContext *ctx, arg_VCADD *a);
+typedef arg_disas_neon_shared1 arg_VDOT;
+static bool trans_VDOT(DisasContext *ctx, arg_VDOT *a);
+typedef arg_disas_neon_shared2 arg_VFML;
+static bool trans_VFML(DisasContext *ctx, arg_VFML *a);
+typedef arg_disas_neon_shared3 arg_VCMLA_scalar;
+static bool trans_VCMLA_scalar(DisasContext *ctx, arg_VCMLA_scalar *a);
+typedef arg_disas_neon_shared4 arg_VDOT_scalar;
+static bool trans_VDOT_scalar(DisasContext *ctx, arg_VDOT_scalar *a);
+typedef arg_disas_neon_shared5 arg_VFML_scalar;
+static bool trans_VFML_scalar(DisasContext *ctx, arg_VFML_scalar *a);
+
+static void disas_neon_shared_extract_disas_neon_shared_Fmt_0(DisasContext *ctx, arg_disas_neon_shared0 *a, uint32_t insn)
+{
+    a->rot = extract32(insn, 23, 2);
+    a->size = extract32(insn, 20, 1);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_shared_extract_disas_neon_shared_Fmt_1(DisasContext *ctx, arg_disas_neon_shared0 *a, uint32_t insn)
+{
+    a->rot = extract32(insn, 24, 1);
+    a->size = extract32(insn, 20, 1);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_shared_extract_disas_neon_shared_Fmt_2(DisasContext *ctx, arg_disas_neon_shared1 *a, uint32_t insn)
+{
+    a->q = extract32(insn, 6, 1);
+    a->u = extract32(insn, 4, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_shared_extract_disas_neon_shared_Fmt_3(DisasContext *ctx, arg_disas_neon_shared2 *a, uint32_t insn)
+{
+    a->s = extract32(insn, 23, 1);
+    a->vm = deposit32(extract32(insn, 5, 1), 1, 31, extract32(insn, 0, 4));
+    a->vn = deposit32(extract32(insn, 7, 1), 1, 31, extract32(insn, 16, 4));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->q = 0;
+}
+
+static void disas_neon_shared_extract_disas_neon_shared_Fmt_4(DisasContext *ctx, arg_disas_neon_shared2 *a, uint32_t insn)
+{
+    a->s = extract32(insn, 23, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->q = 1;
+}
+
+static void disas_neon_shared_extract_disas_neon_shared_Fmt_5(DisasContext *ctx, arg_disas_neon_shared3 *a, uint32_t insn)
+{
+    a->rot = extract32(insn, 20, 2);
+    a->q = extract32(insn, 6, 1);
+    a->index = extract32(insn, 5, 1);
+    a->vm = extract32(insn, 0, 4);
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 0;
+}
+
+static void disas_neon_shared_extract_disas_neon_shared_Fmt_6(DisasContext *ctx, arg_disas_neon_shared3 *a, uint32_t insn)
+{
+    a->rot = extract32(insn, 20, 2);
+    a->q = extract32(insn, 6, 1);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->size = 1;
+    a->index = 0;
+}
+
+static void disas_neon_shared_extract_disas_neon_shared_Fmt_7(DisasContext *ctx, arg_disas_neon_shared4 *a, uint32_t insn)
+{
+    a->q = extract32(insn, 6, 1);
+    a->index = extract32(insn, 5, 1);
+    a->u = extract32(insn, 4, 1);
+    a->rm = extract32(insn, 0, 4);
+    a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+}
+
+static void disas_neon_shared_extract_disas_neon_shared_Fmt_8(DisasContext *ctx, arg_disas_neon_shared5 *a, uint32_t insn)
+{
+    a->s = extract32(insn, 20, 1);
+    a->index = extract32(insn, 3, 1);
+    a->rm = deposit32(extract32(insn, 5, 1), 1, 31, extract32(insn, 0, 3));
+    a->vn = deposit32(extract32(insn, 7, 1), 1, 31, extract32(insn, 16, 4));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->q = 0;
+}
+
+static void disas_neon_shared_extract_disas_neon_shared_Fmt_9(DisasContext *ctx, arg_disas_neon_shared5 *a, uint32_t insn)
+{
+    a->s = extract32(insn, 20, 1);
+    a->rm = extract32(insn, 0, 3);
+    a->index = deposit32(extract32(insn, 3, 1), 1, 31, extract32(insn, 5, 1));
+    a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1));
+    a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1));
+    a->q = 1;
+}
+
+static bool disas_neon_shared(DisasContext *ctx, uint32_t insn)
+{
+    union {
+        arg_disas_neon_shared0 f_disas_neon_shared0;
+        arg_disas_neon_shared1 f_disas_neon_shared1;
+        arg_disas_neon_shared2 f_disas_neon_shared2;
+        arg_disas_neon_shared3 f_disas_neon_shared3;
+        arg_disas_neon_shared4 f_disas_neon_shared4;
+        arg_disas_neon_shared5 f_disas_neon_shared5;
+    } u;
+
+    switch (insn & 0xfe000f00) {
+    case 0xfc000800:
+        /* 1111110. ........ ....1000 ........ */
+        switch (insn & 0x00200010) {
+        case 0x00000000:
+            /* 1111110. ..0..... ....1000 ...0.... */
+            disas_neon_shared_extract_disas_neon_shared_Fmt_1(ctx, &u.f_disas_neon_shared0, insn);
+            switch ((insn >> 23) & 0x1) {
+            case 0x1:
+                /* 1111110. 1.0..... ....1000 ...0.... */
+                if (trans_VCADD(ctx, &u.f_disas_neon_shared0)) return true;
+                return false;
+            }
+            return false;
+        case 0x00200000:
+            /* 1111110. ..1..... ....1000 ...0.... */
+            disas_neon_shared_extract_disas_neon_shared_Fmt_0(ctx, &u.f_disas_neon_shared0, insn);
+            if (trans_VCMLA(ctx, &u.f_disas_neon_shared0)) return true;
+            return false;
+        case 0x00200010:
+            /* 1111110. ..1..... ....1000 ...1.... */
+            switch (insn & 0x01100040) {
+            case 0x00000000:
+                /* 11111100 ..10.... ....1000 .0.1.... */
+                disas_neon_shared_extract_disas_neon_shared_Fmt_3(ctx, &u.f_disas_neon_shared2, insn);
+                if (trans_VFML(ctx, &u.f_disas_neon_shared2)) return true;
+                return false;
+            case 0x00000040:
+                /* 11111100 ..10.... ....1000 .1.1.... */
+                disas_neon_shared_extract_disas_neon_shared_Fmt_4(ctx, &u.f_disas_neon_shared2, insn);
+                if (trans_VFML(ctx, &u.f_disas_neon_shared2)) return true;
+                return false;
+            }
+            return false;
+        }
+        return false;
+    case 0xfc000d00:
+        /* 1111110. ........ ....1101 ........ */
+        disas_neon_shared_extract_disas_neon_shared_Fmt_2(ctx, &u.f_disas_neon_shared1, insn);
+        switch (insn & 0x01b00000) {
+        case 0x00200000:
+            /* 11111100 0.10.... ....1101 ........ */
+            if (trans_VDOT(ctx, &u.f_disas_neon_shared1)) return true;
+            return false;
+        }
+        return false;
+    case 0xfe000800:
+        /* 1111111. ........ ....1000 ........ */
+        switch (insn & 0x01800010) {
+        case 0x00000000:
+            /* 11111110 0....... ....1000 ...0.... */
+            disas_neon_shared_extract_disas_neon_shared_Fmt_5(ctx, &u.f_disas_neon_shared3, insn);
+            if (trans_VCMLA_scalar(ctx, &u.f_disas_neon_shared3)) return true;
+            return false;
+        case 0x00000010:
+            /* 11111110 0....... ....1000 ...1.... */
+            switch (insn & 0x00200040) {
+            case 0x00000000:
+                /* 11111110 0.0..... ....1000 .0.1.... */
+                disas_neon_shared_extract_disas_neon_shared_Fmt_8(ctx, &u.f_disas_neon_shared5, insn);
+                if (trans_VFML_scalar(ctx, &u.f_disas_neon_shared5)) return true;
+                return false;
+            case 0x00000040:
+                /* 11111110 0.0..... ....1000 .1.1.... */
+                disas_neon_shared_extract_disas_neon_shared_Fmt_9(ctx, &u.f_disas_neon_shared5, insn);
+                if (trans_VFML_scalar(ctx, &u.f_disas_neon_shared5)) return true;
+                return false;
+            }
+            return false;
+        case 0x00800000:
+            /* 11111110 1....... ....1000 ...0.... */
+            disas_neon_shared_extract_disas_neon_shared_Fmt_6(ctx, &u.f_disas_neon_shared3, insn);
+            if (trans_VCMLA_scalar(ctx, &u.f_disas_neon_shared3)) return true;
+            return false;
+        }
+        return false;
+    case 0xfe000d00:
+        /* 1111111. ........ ....1101 ........ */
+        disas_neon_shared_extract_disas_neon_shared_Fmt_7(ctx, &u.f_disas_neon_shared4, insn);
+        switch (insn & 0x01b00000) {
+        case 0x00200000:
+            /* 11111110 0.10.... ....1101 ........ */
+            if (trans_VDOT_scalar(ctx, &u.f_disas_neon_shared4)) return true;
+            return false;
+        }
+        return false;
+    }
+    return false;
+}
diff --git a/qemu/target/arm/decode-sve.inc.c b/qemu/target/arm/decode-sve.inc.c
index 9740f1aa80..d04c24c0ee 100644
--- a/qemu/target/arm/decode-sve.inc.c
+++ b/qemu/target/arm/decode-sve.inc.c
@@ -43,9 +43,7 @@ typedef struct {
 } arg_disas_sve31;
 
 typedef struct {
-#ifdef _MSC_VER
-    int dummy;
-#endif
+    int : 0;
 } arg_disas_sve32;
 
 typedef struct {
diff --git a/qemu/target/arm/helper-a64.c b/qemu/target/arm/helper-a64.c
index 12da114039..df30f11c47 100644
--- a/qemu/target/arm/helper-a64.c
+++ b/qemu/target/arm/helper-a64.c
@@ -1096,78 +1096,40 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
      * alignment faults or any memory attribute handling).
      */
 
-    struct uc_struct *uc = env->uc;
-    ARMCPU *cpu = env_archcpu(env);
-    uint64_t blocklen = 4 << cpu->dcz_blocksize;
+    UNICORN_UNUSED struct uc_struct *uc = env->uc;
+    int blocklen = 4 << env_archcpu(env)->dcz_blocksize;
     uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+    int mmu_idx = cpu_mmu_index(env, false);
+    void *mem;
+
     /*
-     * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
-     * the block size so we might have to do more than one TLB lookup.
-     * We know that in fact for any v8 CPU the page size is at least 4K
-     * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
-     * 1K as an artefact of legacy v5 subpage support being present in the
-     * same QEMU executable. So in practice the hostaddr[] array has
-     * two entries, given the current setting of TARGET_PAGE_BITS_MIN.
+     * Trapless lookup.  In addition to actual invalid page, may
+     * return NULL for I/O, watchpoints, clean pages, etc.
      */
-    int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
-    void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
-    int try, i;
-    unsigned mmu_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
-
-    assert(maxidx <= ARRAY_SIZE(hostaddr));
-
-    for (try = 0; try < 2; try++) {
-
-        for (i = 0; i < maxidx; i++) {
-            hostaddr[i] = tlb_vaddr_to_host(env,
-                    vaddr + TARGET_PAGE_SIZE * i,
-                    1, mmu_idx);
-            if (!hostaddr[i]) {
-                break;
-            }
-        }
-        if (i == maxidx) {
+    mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
+
+    if (unlikely(!mem)) {
+        uintptr_t ra = GETPC();
+
+        /*
+         * Trap if accessing an invalid page.  DC_ZVA requires that we supply
+         * the original pointer for an invalid page.  But watchpoints require
+         * that we probe the actual space.  So do both.
+         */
+        (void) probe_write(env, vaddr_in, 1, mmu_idx, ra);
+        mem = probe_write(env, vaddr, blocklen, mmu_idx, ra);
+
+        if (unlikely(!mem)) {
             /*
-             * If it's all in the TLB it's fair game for just writing to;
-             * we know we don't need to update dirty status, etc.
+             * The only remaining reason for mem == NULL is I/O.
+             * Just do a series of byte writes as the architecture demands.
              */
-            for (i = 0; i < maxidx - 1; i++) {
-                memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
+            for (int i = 0; i < blocklen; i++) {
+                cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra);
             }
-            memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
             return;
         }
-        /*
-         * OK, try a store and see if we can populate the tlb. This
-         * might cause an exception if the memory isn't writable,
-         * in which case we will longjmp out of here. We must for
-         * this purpose use the actual register value passed to us
-         * so that we get the fault address right.
-         */
-        helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
-        /* Now we can populate the other TLB entries, if any */
-        for (i = 0; i < maxidx; i++) {
-            uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
-            if (va != (vaddr_in & TARGET_PAGE_MASK)) {
-                helper_ret_stb_mmu(env, va, 0, oi, GETPC());
-            }
-        }
     }
 
-    /*
-     * Slow path (probably attempt to do this to an I/O device or
-     * similar, or clearing of a block of code we have translations
-     * cached for). Just do a series of byte writes as the architecture
-     * demands. It's not worth trying to use a cpu_physical_memory_map(),
-     * memset(), unmap() sequence here because:
-     *  + we'd need to account for the blocksize being larger than a page
-     *  + the direct-RAM access case is almost always going to be dealt
-     *    with in the fastpath code above, so there's no speed benefit
-     *  + we would have to deal with the map returning NULL because the
-     *    bounce buffer was in use
-     */
-    for (i = 0; i < blocklen; i++) {
-        helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
-    }
+    memset(mem, 0, blocklen);
 }
diff --git a/qemu/target/arm/helper-a64.h b/qemu/target/arm/helper-a64.h
index 3df7c185aa..5b0b699a50 100644
--- a/qemu/target/arm/helper-a64.h
+++ b/qemu/target/arm/helper-a64.h
@@ -103,3 +103,19 @@ DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64)
 DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64)
+
+DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64)
+DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64)
+DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64)
+DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32)
+DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(stg, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(stg_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64)
diff --git a/qemu/target/arm/helper-sve.h b/qemu/target/arm/helper-sve.h
index 2f47279155..199ffee9cc 100644
--- a/qemu/target/arm/helper-sve.h
+++ b/qemu/target/arm/helper-sve.h
@@ -1099,25 +1099,40 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
-
-DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
-
-DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
-
-DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
-
-DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@@ -1181,6 +1196,64 @@ DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
+DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
 DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1212,6 +1285,55 @@ DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
+DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+
 DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1243,6 +1365,55 @@ DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
+DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, tl, i32)
+
 DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1290,6 +1461,53 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
+DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
 DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG,
@@ -1399,6 +1617,116 @@ DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG,
 DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
+DEF_HELPER_FLAGS_6(sve_ldbsu_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbss_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbsu_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbss_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbdu_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbds_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbdu_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbds_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbdu_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbds_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+
 DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG,
@@ -1508,6 +1836,115 @@ DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG,
 DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
+DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbss_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbsu_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbss_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbds_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbdu_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbds_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbdu_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbds_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
 DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG,
@@ -1575,4 +2012,71 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG,
 DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
+DEF_HELPER_FLAGS_6(sve_stbs_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbs_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbd_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zsu_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbd_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zss_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbd_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+
 DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/qemu/target/arm/helper.c b/qemu/target/arm/helper.c
index 60c9db9e3e..6e28646ad2 100644
--- a/qemu/target/arm/helper.c
+++ b/qemu/target/arm/helper.c
@@ -31,9 +31,11 @@
 
 static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
                                MMUAccessType access_type, ARMMMUIdx mmu_idx,
+                               bool s1_is_el0,
                                hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
                                target_ulong *page_size_ptr,
-                               ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs);
+                               ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
+    UNICORN_NONNULL;
 
 static void switch_mode(CPUARMState *env, int mode);
 
@@ -78,35 +80,19 @@ uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri)
 }
 
 /*
- * Some registers are not accessible if EL3.NS=0 and EL3 is using AArch32 but
- * they are accessible when EL3 is using AArch64 regardless of EL3.NS.
- *
- * access_el3_aa32ns: Used to check AArch32 register views.
- * access_el3_aa32ns_aa64any: Used to check both AArch32/64 register views.
+ * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0.
  */
 static CPAccessResult access_el3_aa32ns(CPUARMState *env,
                                         const ARMCPRegInfo *ri,
                                         bool isread)
 {
-    bool secure = arm_is_secure_below_el3(env);
-
-    assert(!arm_el_is_aa64(env, 3));
-    if (secure) {
+    if (!is_a64(env) && arm_current_el(env) == 3 &&
+        arm_is_secure_below_el3(env)) {
         return CP_ACCESS_TRAP_UNCATEGORIZED;
     }
     return CP_ACCESS_OK;
 }
 
-static CPAccessResult access_el3_aa32ns_aa64any(CPUARMState *env,
-                                                const ARMCPRegInfo *ri,
-                                                bool isread)
-{
-    if (!arm_el_is_aa64(env, 3)) {
-        return access_el3_aa32ns(env, ri, isread);
-    }
-    return CP_ACCESS_OK;
-}
-
 /* Some secure-only AArch32 registers trap to EL3 if used from
  * Secure EL1 (but are just ordinary UNDEF in other non-EL3 contexts).
  * Note that an access from Secure EL1 can only happen if EL3 is AArch64.
@@ -394,8 +380,7 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
     tlb_flush_by_mmuidx(cs,
                         ARMMMUIdxBit_E10_1 |
                         ARMMMUIdxBit_E10_1_PAN |
-                        ARMMMUIdxBit_E10_0 |
-                        ARMMMUIdxBit_Stage2);
+                        ARMMMUIdxBit_E10_0);
 }
 
 static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -406,45 +391,7 @@ static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
     tlb_flush_by_mmuidx_all_cpus_synced(cs,
                                         ARMMMUIdxBit_E10_1 |
                                         ARMMMUIdxBit_E10_1_PAN |
-                                        ARMMMUIdxBit_E10_0 |
-                                        ARMMMUIdxBit_Stage2);
-}
-
-static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                            uint64_t value)
-{
-    /* Invalidate by IPA. This has to invalidate any structures that
-     * contain only stage 2 translation information, but does not need
-     * to apply to structures that contain combined stage 1 and stage 2
-     * translation information.
-     * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
-     */
-    CPUState *cs = env_cpu(env);
-    uint64_t pageaddr;
-
-    if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
-        return;
-    }
-
-    pageaddr = sextract64(value << 12, 0, 40);
-
-    tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
-}
-
-static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                               uint64_t value)
-{
-    CPUState *cs = env_cpu(env);
-    uint64_t pageaddr;
-
-    if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
-        return;
-    }
-
-    pageaddr = sextract64(value << 12, 0, 40);
-
-    tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
-                                             ARMMMUIdxBit_Stage2);
+                                        ARMMMUIdxBit_E10_0);
 }
 
 static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1622,9 +1569,19 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
     uint32_t valid_mask = 0x3fff;
     ARMCPU *cpu = env_archcpu(env);
 
-    if (arm_el_is_aa64(env, 3)) {
+    if (ri->state == ARM_CP_STATE_AA64) {
         value |= SCR_FW | SCR_AW;   /* these two bits are RES1.  */
         valid_mask &= ~SCR_NET;
+
+        if (cpu_isar_feature(aa64_lor, cpu)) {
+            valid_mask |= SCR_TLOR;
+        }
+        if (cpu_isar_feature(aa64_pauth, cpu)) {
+            valid_mask |= SCR_API | SCR_APK;
+        }
+        if (cpu_isar_feature(aa64_mte, cpu)) {
+            valid_mask |= SCR_ATA;
+        }
     } else {
         valid_mask &= ~(SCR_RW | SCR_ST);
     }
@@ -1643,12 +1600,6 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
             valid_mask &= ~SCR_SMD;
         }
     }
-    if (cpu_isar_feature(aa64_lor, cpu)) {
-        valid_mask |= SCR_TLOR;
-    }
-    if (cpu_isar_feature(aa64_pauth, cpu)) {
-        valid_mask |= SCR_API | SCR_APK;
-    }
 
     /* Clear all-context RES0 bits.  */
     value &= valid_mask;
@@ -1875,13 +1826,13 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
       .resetvalue = 0x0 },
     { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW, .accessfn = access_tpm,
-      .type = ARM_CP_ALIAS | ARM_CP_IO,
+      .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
       .writefn = pmintenclr_write, },
     { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2,
       .access = PL1_RW, .accessfn = access_tpm,
-      .type = ARM_CP_ALIAS | ARM_CP_IO,
+      .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
       .writefn = pmintenclr_write },
     { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH,
@@ -3044,7 +2995,7 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
              * Report exception with ESR indicating a fault due to a
              * translation table walk for a cache maintenance instruction.
              */
-            syn = syn_data_abort_no_iss(current_el == target_el,
+            syn = syn_data_abort_no_iss(current_el == target_el, 0,
                                         fi.ea, 1, fi.s1ptw, 1, fsc);
             env->exception.vaddress = value;
             env->exception.fsr = fsr;
@@ -3567,8 +3518,7 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
         tlb_flush_by_mmuidx(cs,
                             ARMMMUIdxBit_E10_1 |
                             ARMMMUIdxBit_E10_1_PAN |
-                            ARMMMUIdxBit_E10_0 |
-                            ARMMMUIdxBit_Stage2);
+                            ARMMMUIdxBit_E10_0);
         raw_write(env, ri, value);
     }
 }
@@ -4050,11 +4000,6 @@ static int alle1_tlbmask(CPUARMState *env)
         return ARMMMUIdxBit_SE10_1 |
                ARMMMUIdxBit_SE10_1_PAN |
                ARMMMUIdxBit_SE10_0;
-    } else if (arm_feature(env, ARM_FEATURE_EL2)) {
-        return ARMMMUIdxBit_E10_1 |
-               ARMMMUIdxBit_E10_1_PAN |
-               ARMMMUIdxBit_E10_0 |
-               ARMMMUIdxBit_Stage2;
     } else {
         return ARMMMUIdxBit_E10_1 |
                ARMMMUIdxBit_E10_1_PAN |
@@ -4201,44 +4146,6 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                              ARMMMUIdxBit_SE3);
 }
 
-static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                    uint64_t value)
-{
-    /* Invalidate by IPA. This has to invalidate any structures that
-     * contain only stage 2 translation information, but does not need
-     * to apply to structures that contain combined stage 1 and stage 2
-     * translation information.
-     * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
-     */
-    ARMCPU *cpu = env_archcpu(env);
-    CPUState *cs = CPU(cpu);
-    uint64_t pageaddr;
-
-    if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
-        return;
-    }
-
-    pageaddr = sextract64(value << 12, 0, 48);
-
-    tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
-}
-
-static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                      uint64_t value)
-{
-    CPUState *cs = env_cpu(env);
-    uint64_t pageaddr;
-
-    if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
-        return;
-    }
-
-    pageaddr = sextract64(value << 12, 0, 48);
-
-    tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
-                                             ARMMMUIdxBit_Stage2);
-}
-
 static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
                                       bool isread)
 {
@@ -4306,6 +4213,15 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     ARMCPU *cpu = env_archcpu(env);
 
+    if (ri->state == ARM_CP_STATE_AA64 && !cpu_isar_feature(aa64_mte, cpu)) {
+        if (ri->opc1 == 6) { /* SCTLR_EL3 */
+            value &= ~(SCTLR_ITFSB | SCTLR_TCF | SCTLR_ATA);
+        } else {
+            value &= ~(SCTLR_ITFSB | SCTLR_TCF0 | SCTLR_TCF |
+                       SCTLR_ATA0 | SCTLR_ATA);
+        }
+    }
+
     if (raw_read(env, ri) == value) {
         /* Skip the TLB flush if nothing actually changed; Linux likes
          * to do a lot of pointless SCTLR writes.
@@ -4320,6 +4236,7 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
     raw_write(env, ri, value);
     /* ??? Lots of these bits are not implemented.  */
+
     /* This may enable/disable the MMU, so do a TLB flush.  */
     tlb_flush(CPU(cpu));
 
@@ -4475,12 +4392,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
       .writefn = tlbi_aa64_vae1_write },
     { .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
-      .access = PL2_W, .type = ARM_CP_NO_RAW,
-      .writefn = tlbi_aa64_ipas2e1is_write },
+      .access = PL2_W, .type = ARM_CP_NOP },
     { .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
-      .access = PL2_W, .type = ARM_CP_NO_RAW,
-      .writefn = tlbi_aa64_ipas2e1is_write },
+      .access = PL2_W, .type = ARM_CP_NOP },
     { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4,
       .access = PL2_W, .type = ARM_CP_NO_RAW,
@@ -4491,12 +4406,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
       .writefn = tlbi_aa64_alle1is_write },
     { .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
-      .access = PL2_W, .type = ARM_CP_NO_RAW,
-      .writefn = tlbi_aa64_ipas2e1_write },
+      .access = PL2_W, .type = ARM_CP_NOP },
     { .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
-      .access = PL2_W, .type = ARM_CP_NO_RAW,
-      .writefn = tlbi_aa64_ipas2e1_write },
+      .access = PL2_W, .type = ARM_CP_NOP },
     { .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4,
       .access = PL2_W, .type = ARM_CP_NO_RAW,
@@ -4575,20 +4488,16 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
       .writefn = tlbimva_hyp_is_write },
     { .name = "TLBIIPAS2",
       .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
-      .type = ARM_CP_NO_RAW, .access = PL2_W,
-      .writefn = tlbiipas2_write },
+      .type = ARM_CP_NOP, .access = PL2_W },
     { .name = "TLBIIPAS2IS",
       .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
-      .type = ARM_CP_NO_RAW, .access = PL2_W,
-      .writefn = tlbiipas2_is_write },
+      .type = ARM_CP_NOP, .access = PL2_W },
     { .name = "TLBIIPAS2L",
       .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
-      .type = ARM_CP_NO_RAW, .access = PL2_W,
-      .writefn = tlbiipas2_write },
+      .type = ARM_CP_NOP, .access = PL2_W },
     { .name = "TLBIIPAS2LIS",
       .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
-      .type = ARM_CP_NO_RAW, .access = PL2_W,
-      .writefn = tlbiipas2_is_write },
+      .type = ARM_CP_NOP, .access = PL2_W },
     /* 32 bit cache operations */
     { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
       .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access },
@@ -4702,7 +4611,6 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
       .access = PL2_RW,
       .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore },
     { .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH,
-      .type = ARM_CP_NO_RAW,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
       .access = PL2_RW,
       .type = ARM_CP_CONST, .resetvalue = 0 },
@@ -4744,7 +4652,7 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "VTCR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
-      .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
       .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "VTTBR", .state = ARM_CP_STATE_AA32,
       .cp = 15, .opc1 = 6, .crm = 2,
@@ -4792,7 +4700,7 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
       .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "HPFAR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
-      .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
       .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3,
@@ -4849,15 +4757,19 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask)
         if (cpu_isar_feature(aa64_pauth, cpu)) {
             valid_mask |= HCR_API | HCR_APK;
         }
+        if (cpu_isar_feature(aa64_mte, cpu)) {
+            valid_mask |= HCR_ATA | HCR_DCT | HCR_TID5;
+        }
     }
 
     /* Clear RES0 bits.  */
     value &= valid_mask;
 
-    /* These bits change the MMU setup:
+    /*
+     * These bits change the MMU setup:
      * HCR_VM enables stage 2 translation
      * HCR_PTW forbids certain page-table setups
-     * HCR_DC Disables stage1 and enables stage2 translation
+     * HCR_DC disables stage1 and enables stage2 translation
      */
     if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) {
         tlb_flush(CPU(cpu));
@@ -5430,6 +5342,9 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu)
         { K(3, 0,  1, 2, 0), K(3, 4,  1, 2, 0), K(3, 5, 1, 2, 0),
           "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve },
 
+        { K(3, 0,  5, 6, 0), K(3, 4,  5, 6, 0), K(3, 5, 5, 6, 0),
+          "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte },
+
         /* TODO: ARMv8.2-SPE -- PMSCR_EL2 */
         /* TODO: ARMv8.4-Trace -- TRFCR_EL2 */
     };
@@ -6382,7 +6297,7 @@ static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque,
         /* RCU lock is already being held */
         mr = memory_region_from_host(uc, haddr, &offset);
         if (mr) {
-            // memory_region_do_writeback(mr, offset, dline_size); FIXME
+            // memory_region_writeback(mr, offset, dline_size); FIXME
         }
     }
 }
@@ -6405,6 +6320,159 @@ static const ARMCPRegInfo dcpodp_reg[] = {
 
 #endif
 
+static CPAccessResult access_aa64_tid5(CPUARMState *env, const ARMCPRegInfo *ri,
+                                       bool isread)
+{
+    if ((arm_current_el(env) < 2) && (arm_hcr_el2_eff(env) & HCR_TID5)) {
+        return CP_ACCESS_TRAP_EL2;
+    }
+
+    return CP_ACCESS_OK;
+}
+
+static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri,
+                                 bool isread)
+{
+    int el = arm_current_el(env);
+
+    if (el < 2 &&
+        arm_feature(env, ARM_FEATURE_EL2) &&
+        !(arm_hcr_el2_eff(env) & HCR_ATA)) {
+        return CP_ACCESS_TRAP_EL2;
+    }
+    if (el < 3 &&
+        arm_feature(env, ARM_FEATURE_EL3) &&
+        !(env->cp15.scr_el3 & SCR_ATA)) {
+        return CP_ACCESS_TRAP_EL3;
+    }
+    return CP_ACCESS_OK;
+}
+
+static uint64_t tco_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    return env->pstate & PSTATE_TCO;
+}
+
+static void tco_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)
+{
+    env->pstate = (env->pstate & ~PSTATE_TCO) | (val & PSTATE_TCO);
+}
+
+static const ARMCPRegInfo mte_reginfo[] = {
+    { .name = "TFSRE0_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 1,
+      .access = PL1_RW, .accessfn = access_mte,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) },
+    { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0,
+      .access = PL1_RW, .accessfn = access_mte,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) },
+    { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 6, .opc2 = 0,
+      .access = PL2_RW, .accessfn = access_mte,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) },
+    { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 6, .opc2 = 0,
+      .access = PL3_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[3]) },
+    { .name = "RGSR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 5,
+      .access = PL1_RW, .accessfn = access_mte,
+      .fieldoffset = offsetof(CPUARMState, cp15.rgsr_el1) },
+    { .name = "GCR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6,
+      .access = PL1_RW, .accessfn = access_mte,
+      .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) },
+    { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4,
+      .access = PL1_R, .accessfn = access_aa64_tid5,
+      .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS },
+    { .name = "TCO", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7,
+      .type = ARM_CP_NO_RAW,
+      .access = PL0_RW, .readfn = tco_read, .writefn = tco_write },
+    { .name = "DC_IGVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 3,
+      .type = ARM_CP_NOP, .access = PL1_W,
+      .accessfn = aa64_cacheop_poc_access },
+    { .name = "DC_IGSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 4,
+      .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+    { .name = "DC_IGDVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL1_W,
+      .accessfn = aa64_cacheop_poc_access },
+    { .name = "DC_IGDSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 6,
+      .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+    { .name = "DC_CGSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 4,
+      .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+    { .name = "DC_CGDSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 6,
+      .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+    { .name = "DC_CIGSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 4,
+      .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+    { .name = "DC_CIGDSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 6,
+      .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+    REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo mte_tco_ro_reginfo[] = {
+    { .name = "TCO", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7,
+      .type = ARM_CP_CONST, .access = PL0_RW, },
+    REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = {
+    { .name = "DC_CGVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 3,
+      .type = ARM_CP_NOP, .access = PL0_W,
+      .accessfn = aa64_cacheop_poc_access },
+    { .name = "DC_CGDVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL0_W,
+      .accessfn = aa64_cacheop_poc_access },
+    { .name = "DC_CGVAP", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 3,
+      .type = ARM_CP_NOP, .access = PL0_W,
+      .accessfn = aa64_cacheop_poc_access },
+    { .name = "DC_CGDVAP", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL0_W,
+      .accessfn = aa64_cacheop_poc_access },
+    { .name = "DC_CGVADP", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 3,
+      .type = ARM_CP_NOP, .access = PL0_W,
+      .accessfn = aa64_cacheop_poc_access },
+    { .name = "DC_CGDVADP", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL0_W,
+      .accessfn = aa64_cacheop_poc_access },
+    { .name = "DC_CIGVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 3,
+      .type = ARM_CP_NOP, .access = PL0_W,
+      .accessfn = aa64_cacheop_poc_access },
+    { .name = "DC_CIGDVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL0_W,
+      .accessfn = aa64_cacheop_poc_access },
+    { .name = "DC_GVA", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 3,
+      .access = PL0_W, .type = ARM_CP_DC_GVA,
+      .accessfn = aa64_zva_access,
+    },
+    { .name = "DC_GZVA", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 4,
+      .access = PL0_W, .type = ARM_CP_DC_GZVA,
+      .accessfn = aa64_zva_access,
+    },
+    REGINFO_SENTINEL
+};
+
 static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri,
                                      bool isread)
 {
@@ -7048,12 +7116,12 @@ void register_cp_regs_for_features(ARMCPU *cpu)
             ARMCPRegInfo vpidr_regs[] = {
                 { .name = "VPIDR_EL2", .state = ARM_CP_STATE_BOTH,
                   .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
-                  .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+                  .access = PL2_RW, .accessfn = access_el3_aa32ns,
                   .type = ARM_CP_CONST, .resetvalue = cpu->midr,
                   .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
                 { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_BOTH,
                   .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
-                  .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+                  .access = PL2_RW, .accessfn = access_el3_aa32ns,
                   .type = ARM_CP_NO_RAW,
                   .writefn = arm_cp_write_ignore, .readfn = mpidr_read },
                 REGINFO_SENTINEL
@@ -7466,6 +7534,19 @@ void register_cp_regs_for_features(ARMCPU *cpu)
             define_one_arm_cp_reg(cpu, dcpodp_reg);
         }
     }
+
+    /*
+     * If full MTE is enabled, add all of the system registers.
+     * If only "instructions available at EL0" are enabled,
+     * then define only a RAZ/WI version of PSTATE.TCO.
+     */
+    if (cpu_isar_feature(aa64_mte, cpu)) {
+        define_arm_cp_regs(cpu, mte_reginfo);
+        define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo);
+    } else if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) {
+        define_arm_cp_regs(cpu, mte_tco_ro_reginfo);
+        define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo);
+    }
 #endif
 
     if (cpu_isar_feature(any_predinv, cpu)) {
@@ -8725,6 +8806,77 @@ static void arm_cpu_do_interrupt_aarch32_qemu(CPUState *cs)
     take_aarch32_exception(env, new_mode, mask, offset, addr);
 }
 
+static int aarch64_regnum(CPUARMState *env, int aarch32_reg)
+{
+    /*
+     * Return the register number of the AArch64 view of the AArch32
+     * register @aarch32_reg. The CPUARMState CPSR is assumed to still
+     * be that of the AArch32 mode the exception came from.
+     */
+    int mode = env->uncached_cpsr & CPSR_M;
+
+    switch (aarch32_reg) {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+        return aarch32_reg;
+    case 8:
+    case 9:
+    case 10:
+    case 11:
+    case 12:
+        return mode == ARM_CPU_MODE_FIQ ? aarch32_reg + 16 : aarch32_reg;
+    case 13:
+        switch (mode) {
+        case ARM_CPU_MODE_USR:
+        case ARM_CPU_MODE_SYS:
+            return 13;
+        case ARM_CPU_MODE_HYP:
+            return 15;
+        case ARM_CPU_MODE_IRQ:
+            return 17;
+        case ARM_CPU_MODE_SVC:
+            return 19;
+        case ARM_CPU_MODE_ABT:
+            return 21;
+        case ARM_CPU_MODE_UND:
+            return 23;
+        case ARM_CPU_MODE_FIQ:
+            return 29;
+        default:
+            g_assert_not_reached();
+        }
+    case 14:
+        switch (mode) {
+        case ARM_CPU_MODE_USR:
+        case ARM_CPU_MODE_SYS:
+        case ARM_CPU_MODE_HYP:
+            return 14;
+        case ARM_CPU_MODE_IRQ:
+            return 16;
+        case ARM_CPU_MODE_SVC:
+            return 18;
+        case ARM_CPU_MODE_ABT:
+            return 20;
+        case ARM_CPU_MODE_UND:
+            return 22;
+        case ARM_CPU_MODE_FIQ:
+            return 30;
+        default:
+            g_assert_not_reached();
+        }
+    case 15:
+        return 31;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 /* Handle exception entry to a target EL which is using AArch64 */
 static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs)
 {
@@ -8735,6 +8887,7 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs)
     unsigned int new_mode = aarch64_pstate_mode(new_el, true);
     unsigned int old_mode;
     unsigned int cur_el = arm_current_el(env);
+    int rt;
 
     /*
      * Note that new_el can never be 0.  If cur_el is 0, then
@@ -8790,7 +8943,8 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs)
     case EXCP_HVC:
     case EXCP_HYP_TRAP:
     case EXCP_SMC:
-        if (syn_get_ec(env->exception.syndrome) == EC_ADVSIMDFPACCESSTRAP) {
+        switch (syn_get_ec(env->exception.syndrome)) {
+        case EC_ADVSIMDFPACCESSTRAP:
             /*
              * QEMU internal FP/SIMD syndromes from AArch32 include the
              * TA and coproc fields which are only exposed if the exception
@@ -8798,6 +8952,34 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs)
              * AArch64 format syndrome.
              */
             env->exception.syndrome &= ~MAKE_64BIT_MASK(0, 20);
+            break;
+        case EC_CP14RTTRAP:
+        case EC_CP15RTTRAP:
+        case EC_CP14DTTRAP:
+            /*
+             * For a trap on AArch32 MRC/MCR/LDC/STC the Rt field is currently
+             * the raw register field from the insn; when taking this to
+             * AArch64 we must convert it to the AArch64 view of the register
+             * number. Notice that we read a 4-bit AArch32 register number and
+             * write back a 5-bit AArch64 one.
+             */
+            rt = extract32(env->exception.syndrome, 5, 4);
+            rt = aarch64_regnum(env, rt);
+            env->exception.syndrome = deposit32(env->exception.syndrome,
+                                                5, 5, rt);
+            break;
+        case EC_CP15RRTTRAP:
+        case EC_CP14RRTTRAP:
+            /* Similarly for MRRC/MCRR traps for Rt and Rt2 fields */
+            rt = extract32(env->exception.syndrome, 5, 4);
+            rt = aarch64_regnum(env, rt);
+            env->exception.syndrome = deposit32(env->exception.syndrome,
+                                                5, 5, rt);
+            rt = extract32(env->exception.syndrome, 10, 4);
+            rt = aarch64_regnum(env, rt);
+            env->exception.syndrome = deposit32(env->exception.syndrome,
+                                                10, 5, rt);
+            break;
         }
         env->cp15.esr_el[new_el] = env->exception.syndrome;
         break;
@@ -8850,6 +9032,9 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs)
             break;
         }
     }
+    if (cpu_isar_feature(aa64_mte, cpu)) {
+        new_mode |= PSTATE_TCO;
+    }
 
     pstate_write(env, PSTATE_DAIF | new_mode);
     env->aarch64 = 1;
@@ -8908,44 +9093,6 @@ void arm_cpu_do_interrupt(CPUState *cs)
     cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
 }
 
-/* Return the exception level which controls this address translation regime */
-static uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
-    switch (mmu_idx) {
-    case ARMMMUIdx_E20_0:
-    case ARMMMUIdx_E20_2:
-    case ARMMMUIdx_E20_2_PAN:
-    case ARMMMUIdx_Stage2:
-    case ARMMMUIdx_E2:
-        return 2;
-    case ARMMMUIdx_SE3:
-        return 3;
-    case ARMMMUIdx_SE10_0:
-        return arm_el_is_aa64(env, 3) ? 1 : 3;
-    case ARMMMUIdx_SE10_1:
-    case ARMMMUIdx_SE10_1_PAN:
-    case ARMMMUIdx_Stage1_E0:
-    case ARMMMUIdx_Stage1_E1:
-    case ARMMMUIdx_Stage1_E1_PAN:
-    case ARMMMUIdx_E10_0:
-    case ARMMMUIdx_E10_1:
-    case ARMMMUIdx_E10_1_PAN:
-    case ARMMMUIdx_MPrivNegPri:
-    case ARMMMUIdx_MUserNegPri:
-    case ARMMMUIdx_MPriv:
-    case ARMMMUIdx_MUser:
-    case ARMMMUIdx_MSPrivNegPri:
-    case ARMMMUIdx_MSUserNegPri:
-    case ARMMMUIdx_MSPriv:
-    case ARMMMUIdx_MSUser:
-        return 1;
-    default:
-        g_assert_not_reached();
-        // never reach here
-        return 1;
-    }
-}
-
 uint64_t arm_sctlr(CPUARMState *env, int el)
 {
     /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */
@@ -9024,15 +9171,6 @@ static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx,
     }
 }
 
-/* Return the TCR controlling this translation regime */
-static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
-    if (mmu_idx == ARMMMUIdx_Stage2) {
-        return &env->cp15.vtcr_el2;
-    }
-    return &env->cp15.tcr_el[regime_el(env, mmu_idx)];
-}
-
 /* Convert a possible stage1+2 MMU index into the appropriate
  * stage 1 MMU index
  */
@@ -9189,9 +9327,10 @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
  *
  * @env:     CPUARMState
  * @s2ap:    The 2-bit stage2 access permissions (S2AP)
- * @xn:      XN (execute-never) bit
+ * @xn:      XN (execute-never) bits
+ * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0
  */
-static int get_S2prot(CPUARMState *env, int s2ap, int xn)
+static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
 {
     int prot = 0;
 
@@ -9201,8 +9340,32 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn)
     if (s2ap & 2) {
         prot |= PAGE_WRITE;
     }
-    if (!xn) {
-        if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
+
+    if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) {
+        switch (xn) {
+        case 0:
+            prot |= PAGE_EXEC;
+            break;
+        case 1:
+            if (s1_is_el0) {
+                prot |= PAGE_EXEC;
+            }
+            break;
+        case 2:
+            break;
+        case 3:
+            if (!s1_is_el0) {
+                prot |= PAGE_EXEC;
+            }
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    } else {
+        if (!extract32(xn, 1, 1)) {
+            if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
+                prot |= PAGE_EXEC;
+            }
             prot |= PAGE_EXEC;
         }
     }
@@ -9323,19 +9486,11 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
         int s2prot;
         int ret;
         ARMCacheAttrs cacheattrs = { 0 };
-        ARMCacheAttrs *pcacheattrs = NULL;
-
-        if (env->cp15.hcr_el2 & HCR_PTW) {
-            /*
-             * PTW means we must fault if this S1 walk touches S2 Device
-             * memory; otherwise we don't care about the attributes and can
-             * save the S2 translation the effort of computing them.
-             */
-            pcacheattrs = &cacheattrs;
-        }
 
-        ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_Stage2, &s2pa,
-                                 &txattrs, &s2prot, &s2size, fi, pcacheattrs);
+        ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, ARMMMUIdx_Stage2,
+                                 false,
+                                 &s2pa, &txattrs, &s2prot, &s2size, fi,
+                                 &cacheattrs);
         if (ret) {
             assert(fi->type != ARMFault_None);
             fi->s2addr = addr;
@@ -9343,8 +9498,11 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
             fi->s1ptw = true;
             return ~0;
         }
-        if (pcacheattrs && (pcacheattrs->attrs & 0xf0) == 0) {
-            /* Access was to Device memory: generate Permission fault */
+        if ((env->cp15.hcr_el2 & HCR_PTW) && (cacheattrs.attrs & 0xf0) == 0) {
+            /*
+             * PTW set and S1 walk touched S2 Device memory:
+             * generate Permission fault.
+             */
             fi->type = ARMFault_Permission;
             fi->s2addr = addr;
             fi->stage2 = true;
@@ -9829,6 +9987,16 @@ static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx)
     }
 }
 
+static int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx)
+{
+    if (regime_has_2_ranges(mmu_idx)) {
+        return extract64(tcr, 57, 2);
+    } else {
+        /* Replicate the single TCMA bit so we always have 2 bits.  */
+        return extract32(tcr, 30, 1) * 3;
+    }
+}
+
 ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
                                    ARMMMUIdx mmu_idx, bool data)
 {
@@ -9952,8 +10120,32 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
     };
 }
 
+/**
+ * get_phys_addr_lpae: perform one stage of page table walk, LPAE format
+ *
+ * Returns false if the translation was successful. Otherwise, phys_ptr, attrs,
+ * prot and page_size may not be filled in, and the populated fsr value provides
+ * information on why the translation aborted, in the format of a long-format
+ * DFSR/IFSR fault register, with the following caveats:
+ *  * the WnR bit is never set (the caller must do this).
+ *
+ * @env: CPUARMState
+ * @address: virtual address to get physical address for
+ * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH
+ * @mmu_idx: MMU index indicating required translation regime
+ * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page table
+ *             walk), must be true if this is stage 2 of a stage 1+2 walk for an
+ *             EL0 access). If @mmu_idx is anything else, @s1_is_el0 is ignored.
+ * @phys_ptr: set to the physical address corresponding to the virtual address
+ * @attrs: set to the memory transaction attributes to use
+ * @prot: set to the permissions for the page containing phys_ptr
+ * @page_size_ptr: set to the size of the page containing phys_ptr
+ * @fi: set to fault info if the translation fails
+ * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
+ */
 static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
                                MMUAccessType access_type, ARMMMUIdx mmu_idx,
+                               bool s1_is_el0,
                                hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
                                target_ulong *page_size_ptr,
                                ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
@@ -10176,13 +10368,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     }
 
     ap = extract32(attrs, 4, 2);
-    xn = extract32(attrs, 12, 1);
 
     if (mmu_idx == ARMMMUIdx_Stage2) {
         ns = true;
-        *prot = get_S2prot(env, ap, xn);
+        xn = extract32(attrs, 11, 2);
+        *prot = get_S2prot(env, ap, xn, s1_is_el0);
     } else {
         ns = extract32(attrs, 3, 1);
+        xn = extract32(attrs, 12, 1);
         pxn = extract32(attrs, 11, 1);
         *prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn);
     }
@@ -10201,22 +10394,19 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     }
     /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB.  */
     if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) {
-        txattrs->target_tlb_bit0 = true;
+        arm_tlb_bti_gp(txattrs) = true;
     }
 
-    if (cacheattrs != NULL) {
-        if (mmu_idx == ARMMMUIdx_Stage2) {
-            cacheattrs->attrs = convert_stage2_attrs(env,
-                                                     extract32(attrs, 0, 4));
-        } else {
-            /* Index into MAIR registers for cache attributes */
-            uint8_t attrindx = extract32(attrs, 0, 3);
-            uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
-            assert(attrindx <= 7);
-            cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
-        }
-        cacheattrs->shareability = extract32(attrs, 6, 2);
+    if (mmu_idx == ARMMMUIdx_Stage2) {
+        cacheattrs->attrs = convert_stage2_attrs(env, extract32(attrs, 0, 4));
+    } else {
+        /* Index into MAIR registers for cache attributes */
+        uint8_t attrindx = extract32(attrs, 0, 3);
+        uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
+        assert(attrindx <= 7);
+        cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
     }
+    cacheattrs->shareability = extract32(attrs, 6, 2);
 
     *phys_ptr = descaddr;
     *page_size_ptr = page_size;
@@ -10923,9 +11113,19 @@ static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2)
  */
 static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2)
 {
-    uint8_t s1lo = extract32(s1.attrs, 0, 4), s2lo = extract32(s2.attrs, 0, 4);
-    uint8_t s1hi = extract32(s1.attrs, 4, 4), s2hi = extract32(s2.attrs, 4, 4);
+    uint8_t s1lo, s2lo, s1hi, s2hi;
     ARMCacheAttrs ret;
+    bool tagged = false;
+
+    if (s1.attrs == 0xf0) {
+        tagged = true;
+        s1.attrs = 0xff;
+    }
+
+    s1lo = extract32(s1.attrs, 0, 4);
+    s2lo = extract32(s2.attrs, 0, 4);
+    s1hi = extract32(s1.attrs, 4, 4);
+    s2hi = extract32(s2.attrs, 4, 4);
 
     /* Combine shareability attributes (table D4-43) */
     if (s1.shareability == 2 || s2.shareability == 2) {
@@ -10973,6 +11173,11 @@ static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2)
         }
     }
 
+    /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */
+    if (tagged && ret.attrs == 0xff) {
+        ret.attrs = 0xf0;
+    }
+
     return ret;
 }
 
@@ -11034,29 +11239,35 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
 
             /* S1 is done. Now do S2 translation.  */
             ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_Stage2,
+                                     mmu_idx == ARMMMUIdx_E10_0,
                                      phys_ptr, attrs, &s2_prot,
-                                     page_size, fi,
-                                     cacheattrs != NULL ? &cacheattrs2 : NULL);
+                                     page_size, fi, &cacheattrs2);
             fi->s2addr = ipa;
             /* Combine the S1 and S2 perms.  */
             *prot &= s2_prot;
 
-            /* Combine the S1 and S2 cache attributes, if needed */
-            if (!ret && cacheattrs != NULL) {
-                if (env->cp15.hcr_el2 & HCR_DC) {
-                    /*
-                     * HCR.DC forces the first stage attributes to
-                     *  Normal Non-Shareable,
-                     *  Inner Write-Back Read-Allocate Write-Allocate,
-                     *  Outer Write-Back Read-Allocate Write-Allocate.
-                     */
+            /* If S2 fails, return early.  */
+            if (ret) {
+                return ret;
+            }
+
+            /* Combine the S1 and S2 cache attributes. */
+            if (env->cp15.hcr_el2 & HCR_DC) {
+                /*
+                 * HCR.DC forces the first stage attributes to
+                 *  Normal Non-Shareable,
+                 *  Inner Write-Back Read-Allocate Write-Allocate,
+                 *  Outer Write-Back Read-Allocate Write-Allocate.
+                 * Do not overwrite Tagged within attrs.
+                 */
+                if (cacheattrs->attrs != 0xf0) {
                     cacheattrs->attrs = 0xff;
-                    cacheattrs->shareability = 0;
                 }
-                *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2);
+                cacheattrs->shareability = 0;
             }
 
-            return ret;
+            *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2);
+            return 0;
         } else {
             /*
              * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
@@ -11117,6 +11328,9 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
     /* Definitely a real MMU, not an MPU */
 
     if (regime_translation_disabled(env, mmu_idx)) {
+        uint64_t hcr;
+        uint8_t memattr;
+
         /*
          * MMU disabled.  S1 addresses within aa64 translation regimes are
          * still checked for bounds -- see AArch64.TranslateAddressS1Off.
@@ -11154,11 +11368,32 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
         *phys_ptr = address;
         *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
         *page_size = TARGET_PAGE_SIZE;
+
+        /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */
+        hcr = arm_hcr_el2_eff(env);
+        cacheattrs->shareability = 0;
+        if (hcr & HCR_DC) {
+            if (hcr & HCR_DCT) {
+                memattr = 0xf0;  /* Tagged, Normal, WB, RWA */
+            } else {
+                memattr = 0xff;  /* Normal, WB, RWA */
+            }
+        } else if (access_type == MMU_INST_FETCH) {
+            if (regime_sctlr(env, mmu_idx) & SCTLR_I) {
+                memattr = 0xee;  /* Normal, WT, RA, NT */
+            } else {
+                memattr = 0x44;  /* Normal, NC, No */
+            }
+            cacheattrs->shareability = 2; /* outer sharable */
+        } else {
+            memattr = 0x00;      /* Device, nGnRnE */
+        }
+        cacheattrs->attrs = memattr;
         return 0;
     }
 
     if (regime_using_lpae_format(env, mmu_idx)) {
-        return get_phys_addr_lpae(env, address, access_type, mmu_idx,
+        return get_phys_addr_lpae(env, address, access_type, mmu_idx, false,
                                   phys_ptr, attrs, prot, page_size,
                                   fi, cacheattrs);
     } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {
@@ -11181,11 +11416,12 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
     bool ret;
     ARMMMUFaultInfo fi = { 0 };
     ARMMMUIdx mmu_idx = arm_mmu_idx(env);
+    ARMCacheAttrs cacheattrs = {};
 
     *attrs = (MemTxAttrs) { 0 };
 
     ret = get_phys_addr(env, addr, 0, mmu_idx, &phys_addr,
-                        attrs, &prot, &page_size, &fi, NULL);
+                        attrs, &prot, &page_size, &fi, &cacheattrs);
 
     if (ret) {
         return -1;
@@ -11719,6 +11955,35 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
         }
     }
 
+    if (cpu_isar_feature(aa64_mte, env_archcpu(env))) {
+        /*
+         * Set MTE_ACTIVE if any access may be Checked, and leave clear
+         * if all accesses must be Unchecked:
+         * 1) If no TBI, then there are no tags in the address to check,
+         * 2) If Tag Check Override, then all accesses are Unchecked,
+         * 3) If Tag Check Fail == 0, then Checked access have no effect,
+         * 4) If no Allocation Tag Access, then all accesses are Unchecked.
+         */
+        if (allocation_tag_access_enabled(env, el, sctlr)) {
+            FIELD_DP32(flags, TBFLAG_A64, ATA, 1, flags);
+            if (tbid
+                && !(env->pstate & PSTATE_TCO)
+                && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) {
+                FIELD_DP32(flags, TBFLAG_A64, MTE_ACTIVE, 1, flags);
+            }
+        }
+        /* And again for unprivileged accesses, if required.  */
+        if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV)
+            && tbid
+            && !(env->pstate & PSTATE_TCO)
+            && (sctlr & SCTLR_TCF0)
+            && allocation_tag_access_enabled(env, 0, sctlr)) {
+            FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1, flags);
+        }
+        /* Cache TCMA as well as TBI. */
+        FIELD_DP32(flags, TBFLAG_A64, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx), flags);
+    }
+
     return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
 }
 
diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h
index 616d032c84..b48d6eb94e 100644
--- a/qemu/target/arm/helper.h
+++ b/qemu/target/arm/helper.h
@@ -100,6 +100,8 @@ DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int)
 DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int)
 
+DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32)
+
 DEF_HELPER_1(vfp_get_fpscr, i32, env)
 DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
 
@@ -207,16 +209,16 @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32)
 DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
 DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
 
-DEF_HELPER_3(recps_f32, f32, f32, f32, env)
-DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
+DEF_HELPER_3(recps_f32, f32, env, f32, f32)
+DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32)
 DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
 DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
 DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
 DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_2(recpe_u32, i32, i32, ptr)
-DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
+DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
+DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
 DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i32, i32, i32, ptr, i32)
 
 DEF_HELPER_3(shl_cc, i32, env, i32, i32)
@@ -279,19 +281,6 @@ DEF_HELPER_2(neon_hsub_u16, i32, i32, i32)
 DEF_HELPER_2(neon_hsub_s32, s32, s32, s32)
 DEF_HELPER_2(neon_hsub_u32, i32, i32, i32)
 
-DEF_HELPER_2(neon_cgt_u8, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_s8, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_u16, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_s16, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_u32, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_s32, i32, i32, i32)
-DEF_HELPER_2(neon_cge_u8, i32, i32, i32)
-DEF_HELPER_2(neon_cge_s8, i32, i32, i32)
-DEF_HELPER_2(neon_cge_u16, i32, i32, i32)
-DEF_HELPER_2(neon_cge_s16, i32, i32, i32)
-DEF_HELPER_2(neon_cge_u32, i32, i32, i32)
-DEF_HELPER_2(neon_cge_s32, i32, i32, i32)
-
 DEF_HELPER_2(neon_pmin_u8, i32, i32, i32)
 DEF_HELPER_2(neon_pmin_s8, i32, i32, i32)
 DEF_HELPER_2(neon_pmin_u16, i32, i32, i32)
@@ -301,13 +290,6 @@ DEF_HELPER_2(neon_pmax_s8, i32, i32, i32)
 DEF_HELPER_2(neon_pmax_u16, i32, i32, i32)
 DEF_HELPER_2(neon_pmax_s16, i32, i32, i32)
 
-DEF_HELPER_2(neon_abd_u8, i32, i32, i32)
-DEF_HELPER_2(neon_abd_s8, i32, i32, i32)
-DEF_HELPER_2(neon_abd_u16, i32, i32, i32)
-DEF_HELPER_2(neon_abd_s16, i32, i32, i32)
-DEF_HELPER_2(neon_abd_u32, i32, i32, i32)
-DEF_HELPER_2(neon_abd_s32, i32, i32, i32)
-
 DEF_HELPER_2(neon_shl_u16, i32, i32, i32)
 DEF_HELPER_2(neon_shl_s16, i32, i32, i32)
 DEF_HELPER_2(neon_rshl_u8, i32, i32, i32)
@@ -351,9 +333,6 @@ DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
 DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
 DEF_HELPER_2(neon_tst_u16, i32, i32, i32)
 DEF_HELPER_2(neon_tst_u32, i32, i32, i32)
-DEF_HELPER_2(neon_ceq_u8, i32, i32, i32)
-DEF_HELPER_2(neon_ceq_u16, i32, i32, i32)
-DEF_HELPER_2(neon_ceq_u32, i32, i32, i32)
 
 DEF_HELPER_1(neon_clz_u8, i32, i32)
 DEF_HELPER_1(neon_clz_u16, i32, i32)
@@ -423,7 +402,6 @@ DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32)
 DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32)
 DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64)
 
-DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr)
@@ -538,29 +516,40 @@ DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
 DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
 DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr)
 
-DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_2(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr)
-
-DEF_HELPER_FLAGS_3(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-
-DEF_HELPER_FLAGS_3(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_2(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-
-DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
-DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-
-DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
@@ -622,6 +611,8 @@ DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
@@ -690,6 +681,17 @@ DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr)
 DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr)
 
+DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -700,6 +702,66 @@ DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
diff --git a/qemu/target/arm/internals.h b/qemu/target/arm/internals.h
index 5bb1ad0e61..2bd763072b 100644
--- a/qemu/target/arm/internals.h
+++ b/qemu/target/arm/internals.h
@@ -454,13 +454,14 @@ static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
         | ARM_EL_IL | (ea << 9) | (s1ptw << 7) | fsc;
 }
 
-static inline uint32_t syn_data_abort_no_iss(int same_el,
+static inline uint32_t syn_data_abort_no_iss(int same_el, int fnv,
                                              int ea, int cm, int s1ptw,
                                              int wnr, int fsc)
 {
     return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
            | ARM_EL_IL
-           | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
+           | (fnv << 10) | (ea << 9) | (cm << 8) | (s1ptw << 7)
+           | (wnr << 6) | fsc;
 }
 
 static inline uint32_t syn_data_abort_with_iss(int same_el,
@@ -908,6 +909,51 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx)
     }
 }
 
+/* Return the exception level which controls this address translation regime */
+static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+    switch (mmu_idx) {
+    case ARMMMUIdx_E20_0:
+    case ARMMMUIdx_E20_2:
+    case ARMMMUIdx_E20_2_PAN:
+    case ARMMMUIdx_Stage2:
+    case ARMMMUIdx_E2:
+        return 2;
+    case ARMMMUIdx_SE3:
+        return 3;
+    case ARMMMUIdx_SE10_0:
+        return arm_el_is_aa64(env, 3) ? 1 : 3;
+    case ARMMMUIdx_SE10_1:
+    case ARMMMUIdx_SE10_1_PAN:
+    case ARMMMUIdx_Stage1_E0:
+    case ARMMMUIdx_Stage1_E1:
+    case ARMMMUIdx_Stage1_E1_PAN:
+    case ARMMMUIdx_E10_0:
+    case ARMMMUIdx_E10_1:
+    case ARMMMUIdx_E10_1_PAN:
+    case ARMMMUIdx_MPrivNegPri:
+    case ARMMMUIdx_MUserNegPri:
+    case ARMMMUIdx_MPriv:
+    case ARMMMUIdx_MUser:
+    case ARMMMUIdx_MSPrivNegPri:
+    case ARMMMUIdx_MSUserNegPri:
+    case ARMMMUIdx_MSPriv:
+    case ARMMMUIdx_MSUser:
+        return 1;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/* Return the TCR controlling this translation regime */
+static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+    if (mmu_idx == ARMMMUIdx_Stage2) {
+        return &env->cp15.vtcr_el2;
+    }
+    return &env->cp15.tcr_el[regime_el(env, mmu_idx)];
+}
+
 /* Return the FSR value for a debug exception (watchpoint, hardware
  * breakpoint or BKPT insn) targeting the specified exception level.
  */
@@ -975,11 +1021,6 @@ static inline int arm_num_ctx_cmps(ARMCPU *cpu)
     }
 }
 
-/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3.
- * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits.
- */
-#define MEMOPIDX_SHIFT  8
-
 /**
  * v7m_using_psp: Return true if using process stack pointer
  * Return true if the CPU is currently using the process stack
@@ -1154,6 +1195,9 @@ static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id)
     if (isar_feature_aa64_uao(id)) {
         valid |= PSTATE_UAO;
     }
+    if (isar_feature_aa64_mte(id)) {
+        valid |= PSTATE_TCO;
+    }
 
     return valid;
 }
@@ -1190,6 +1234,24 @@ static inline int exception_target_el(CPUARMState *env)
     return target_el;
 }
 
+/* Determine if allocation tags are available.  */
+static inline bool allocation_tag_access_enabled(CPUARMState *env, int el,
+                                                 uint64_t sctlr)
+{
+    if (el < 3
+        && arm_feature(env, ARM_FEATURE_EL3)
+        && !(env->cp15.scr_el3 & SCR_ATA)) {
+        return false;
+    }
+    if (el < 2
+        && arm_feature(env, ARM_FEATURE_EL2)
+        && !(arm_hcr_el2_eff(env) & HCR_ATA)) {
+        return false;
+    }
+    sctlr &= (el == 0 ? SCTLR_ATA0 : SCTLR_ATA);
+    return sctlr != 0;
+}
+
 /* Security attributes for an address, as returned by v8m_security_lookup. */
 typedef struct V8M_SAttributes {
     bool subpage; /* true if these attrs don't cover the whole TARGET_PAGE */
@@ -1221,8 +1283,89 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
                    MMUAccessType access_type, ARMMMUIdx mmu_idx,
                    hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
                    target_ulong *page_size,
-                   ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs);
+                   ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
+    UNICORN_NONNULL;
 
 void arm_log_exception(int idx);
 
+/*
+ * The log2 of the words in the tag block, for GMID_EL1.BS.
+ * The is the maximum, 256 bytes, which manipulates 64-bits of tags.
+ */
+#define GMID_EL1_BS  6
+
+/* We associate one allocation tag per 16 bytes, the minimum.  */
+#define LOG2_TAG_GRANULE 4
+#define TAG_GRANULE      (1 << LOG2_TAG_GRANULE)
+
+/*
+ * The SVE simd_data field, for memory ops, contains either
+ * rd (5 bits) or a shift count (2 bits).
+ */
+#define SVE_MTEDESC_SHIFT 5
+
+/* Bits within a descriptor passed to the helper_mte_check* functions. */
+FIELD(MTEDESC, MIDX,  0, 4)
+FIELD(MTEDESC, TBI,   4, 2)
+FIELD(MTEDESC, TCMA,  6, 2)
+FIELD(MTEDESC, WRITE, 8, 1)
+FIELD(MTEDESC, ESIZE, 9, 5)
+FIELD(MTEDESC, TSIZE, 14, 10)  /* mte_checkN only */
+
+bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr);
+uint64_t mte_check1(CPUARMState *env, uint32_t desc,
+                    uint64_t ptr, uintptr_t ra);
+uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
+                    uint64_t ptr, uintptr_t ra);
+
+static inline int allocation_tag_from_addr(uint64_t ptr)
+{
+    return extract64(ptr, 56, 4);
+}
+
+static inline uint64_t address_with_allocation_tag(uint64_t ptr, int rtag)
+{
+    return deposit64(ptr, 56, 4, rtag);
+}
+
+/* Return true if tbi bits mean that the access is checked.  */
+static inline bool tbi_check(uint32_t desc, int bit55)
+{
+    return (desc >> (R_MTEDESC_TBI_SHIFT + bit55)) & 1;
+}
+
+/* Return true if tcma bits mean that the access is unchecked.  */
+static inline bool tcma_check(uint32_t desc, int bit55, int ptr_tag)
+{
+    /*
+     * We had extracted bit55 and ptr_tag for other reasons, so fold
+     * (ptr<59:55> == 00000 || ptr<59:55> == 11111) into a single test.
+     */
+    bool match = ((ptr_tag + bit55) & 0xf) == 0;
+    bool tcma = (desc >> (R_MTEDESC_TCMA_SHIFT + bit55)) & 1;
+    return tcma && match;
+}
+
+/*
+ * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
+ * for the tag to be present in the FAR_ELx register.  But for user-only
+ * mode, we do not have a TLB with which to implement this, so we must
+ * remove the top byte.
+ */
+static inline uint64_t useronly_clean_ptr(uint64_t ptr)
+{
+    /* TBI is known to be enabled. */
+    ptr = sextract64(ptr, 0, 56);
+    return ptr;
+}
+
+static inline uint64_t useronly_maybe_clean_ptr(uint32_t desc, uint64_t ptr)
+{
+    int64_t clean_ptr = sextract64(ptr, 0, 56);
+    if (tbi_check(desc, clean_ptr < 0)) {
+        ptr = clean_ptr;
+    }
+    return ptr;
+}
+
 #endif
diff --git a/qemu/target/arm/m_helper.c b/qemu/target/arm/m_helper.c
index 7fd9d21965..22f4b1b949 100644
--- a/qemu/target/arm/m_helper.c
+++ b/qemu/target/arm/m_helper.c
@@ -87,12 +87,13 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value,
     hwaddr physaddr;
     int prot;
     ARMMMUFaultInfo fi = { 0 };
+    ARMCacheAttrs cacheattrs = {};
     bool secure = mmu_idx & ARM_MMU_IDX_M_S;
     // int exc;
     // bool exc_secure;
 
     if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &physaddr,
-                      &attrs, &prot, &page_size, &fi, NULL)) {
+                      &attrs, &prot, &page_size, &fi,  &cacheattrs)) {
         /* MPU/SAU lookup failed */
         if (fi.type == ARMFault_QEMU_SFault) {
             if (mode == STACK_LAZYFP) {
@@ -187,13 +188,14 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr,
     hwaddr physaddr;
     int prot;
     ARMMMUFaultInfo fi = { 0 };
+    ARMCacheAttrs cacheattrs = {};
     bool secure = mmu_idx & ARM_MMU_IDX_M_S;
     int exc;
     bool exc_secure;
     uint32_t value;
 
     if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr,
-                      &attrs, &prot, &page_size, &fi, NULL)) {
+                      &attrs, &prot, &page_size, &fi, &cacheattrs)) {
         /* MPU/SAU lookup failed */
         if (fi.type == ARMFault_QEMU_SFault) {
             qemu_log_mask(CPU_LOG_INT,
@@ -1859,6 +1861,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx,
     V8M_SAttributes sattrs = { 0 };
     MemTxAttrs attrs = { 0 };
     ARMMMUFaultInfo fi = { 0 };
+    ARMCacheAttrs cacheattrs = {};
     MemTxResult txres;
     target_ulong page_size;
     hwaddr physaddr;
@@ -1877,7 +1880,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx,
         return false;
     }
     if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx,
-                      &physaddr, &attrs, &prot, &page_size, &fi, NULL)) {
+                      &physaddr, &attrs, &prot, &page_size, &fi, &cacheattrs)) {
         /* the MPU lookup failed */
         env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK;
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure);
diff --git a/qemu/target/arm/mte_helper.c b/qemu/target/arm/mte_helper.c
new file mode 100644
index 0000000000..630e18a8ac
--- /dev/null
+++ b/qemu/target/arm/mte_helper.c
@@ -0,0 +1,913 @@
+/*
+ * ARM v8.5-MemTag Operations
+ *
+ * Copyright (c) 2020 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "exec/exec-all.h"
+#include "exec/ram_addr.h"
+#include "exec/cpu_ldst.h"
+#include "exec/helper-proto.h"
+#include "qemu/guest-random.h"
+
+
+static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
+{
+    if (exclude == 0xffff) {
+        return 0;
+    }
+    if (offset == 0) {
+        while (exclude & (1 << tag)) {
+            tag = (tag + 1) & 15;
+        }
+    } else {
+        do {
+            do {
+                tag = (tag + 1) & 15;
+            } while (exclude & (1 << tag));
+        } while (--offset > 0);
+    }
+    return tag;
+}
+
+/**
+ * allocation_tag_mem:
+ * @env: the cpu environment
+ * @ptr_mmu_idx: the addressing regime to use for the virtual address
+ * @ptr: the virtual address for which to look up tag memory
+ * @ptr_access: the access to use for the virtual address
+ * @ptr_size: the number of bytes in the normal memory access
+ * @tag_access: the access to use for the tag memory
+ * @tag_size: the number of bytes in the tag memory access
+ * @ra: the return address for exception handling
+ *
+ * Our tag memory is formatted as a sequence of little-endian nibbles.
+ * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
+ * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
+ * for the higher addr.
+ *
+ * Here, resolve the physical address from the virtual address, and return
+ * a pointer to the corresponding tag byte.  Exit with exception if the
+ * virtual address is not accessible for @ptr_access.
+ *
+ * The @ptr_size and @tag_size values may not have an obvious relation
+ * due to the alignment of @ptr, and the number of tag checks required.
+ *
+ * If there is no tag storage corresponding to @ptr, return NULL.
+ */
+static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
+                                   uint64_t ptr, MMUAccessType ptr_access,
+                                   int ptr_size, MMUAccessType tag_access,
+                                   int tag_size, uintptr_t ra)
+{
+    struct uc_struct *uc = env->uc;
+    uintptr_t index;
+    CPUIOTLBEntry *iotlbentry;
+    int in_page, flags;
+    ram_addr_t ptr_ra;
+    hwaddr ptr_paddr, tag_paddr, xlat;
+    MemoryRegion *mr;
+    ARMASIdx tag_asi;
+    AddressSpace *tag_as;
+    void *host;
+
+    /*
+     * Probe the first byte of the virtual address.  This raises an
+     * exception for inaccessible pages, and resolves the virtual address
+     * into the softmmu tlb.
+     *
+     * When RA == 0, this is for mte_probe1.  The page is expected to be
+     * valid.  Indicate to probe_access_flags no-fault, then assert that
+     * we received a valid page.
+     */
+    flags = probe_access_flags(env, ptr, ptr_access, ptr_mmu_idx,
+                               ra == 0, &host, ra);
+    assert(!(flags & TLB_INVALID_MASK));
+
+    /*
+     * Find the iotlbentry for ptr.  This *must* be present in the TLB
+     * because we just found the mapping.
+     * TODO: Perhaps there should be a cputlb helper that returns a
+     * matching tlb entry + iotlb entry.
+     */
+    index = tlb_index(env, ptr_mmu_idx, ptr);
+    iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index];
+
+    /* If the virtual page MemAttr != Tagged, access unchecked. */
+    if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) {
+        return NULL;
+    }
+
+    /*
+     * If not backed by host ram, there is no tag storage: access unchecked.
+     * This is probably a guest os bug though, so log it.
+     */
+    if (unlikely(flags & TLB_MMIO)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Page @ 0x%" PRIx64 " indicates Tagged Normal memory "
+                      "but is not backed by host ram\n", ptr);
+        return NULL;
+    }
+
+    /*
+     * The Normal memory access can extend to the next page.  E.g. a single
+     * 8-byte access to the last byte of a page will check only the last
+     * tag on the first page.
+     * Any page access exception has priority over tag check exception.
+     */
+    in_page = -(ptr | TARGET_PAGE_MASK);
+    if (unlikely(ptr_size > in_page)) {
+        void *ignore;
+        flags |= probe_access_flags(env, ptr + in_page, ptr_access,
+                                    ptr_mmu_idx, ra == 0, &ignore, ra);
+        assert(!(flags & TLB_INVALID_MASK));
+    }
+
+    /* Any debug exception has priority over a tag check exception. */
+    if (unlikely(flags & TLB_WATCHPOINT)) {
+        int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
+        assert(ra != 0);
+        cpu_check_watchpoint(env_cpu(env), ptr, ptr_size,
+                             iotlbentry->attrs, wp, ra);
+    }
+
+    /*
+     * Find the physical address within the normal mem space.
+     * The memory region lookup must succeed because TLB_MMIO was
+     * not set in the cputlb lookup above.
+     */
+    mr = memory_region_from_host(uc, host, &ptr_ra);
+    tcg_debug_assert(mr != NULL);
+    tcg_debug_assert(memory_region_is_ram(mr));
+    ptr_paddr = ptr_ra;
+    do {
+        ptr_paddr += mr->addr;
+        mr = mr->container;
+    } while (mr);
+
+    /* Convert to the physical address in tag space.  */
+    tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
+
+    /* Look up the address in tag space. */
+    tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
+    tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
+    mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
+                                 tag_access == MMU_DATA_STORE,
+                                 iotlbentry->attrs);
+
+    /*
+     * Note that @mr will never be NULL.  If there is nothing in the address
+     * space at @tag_paddr, the translation will return the unallocated memory
+     * region.  For our purposes, the result must be ram.
+     */
+    if (unlikely(!memory_region_is_ram(mr))) {
+        /* ??? Failure is a board configuration error. */
+        qemu_log_mask(LOG_UNIMP,
+                      "Tag Memory @ 0x%" HWADDR_PRIx " not found for "
+                      "Normal Memory @ 0x%" HWADDR_PRIx "\n",
+                      tag_paddr, ptr_paddr);
+        return NULL;
+    }
+
+    /*
+     * Ensure the tag memory is dirty on write, for migration.
+     * Tag memory can never contain code or display memory (vga).
+     */
+    if (tag_access == MMU_DATA_STORE) {
+        ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat;
+        cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION);
+    }
+
+    return (uint8_t*)memory_region_get_ram_ptr(mr) + xlat;
+}
+
+uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm)
+{
+    uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16);
+    int rrnd = extract32(env->cp15.gcr_el1, 16, 1);
+    int start = extract32(env->cp15.rgsr_el1, 0, 4);
+    int seed = extract32(env->cp15.rgsr_el1, 8, 16);
+    int offset, i, rtag;
+
+    /*
+     * Our IMPDEF choice for GCR_EL1.RRND==1 is to continue to use the
+     * deterministic algorithm.  Except that with RRND==1 the kernel is
+     * not required to have set RGSR_EL1.SEED != 0, which is required for
+     * the deterministic algorithm to function.  So we force a non-zero
+     * SEED for that case.
+     */
+    if (unlikely(seed == 0) && rrnd) {
+        do {
+            uint16_t two;
+
+            if (qemu_guest_getrandom(&two, sizeof(two)) < 0) {
+                /*
+                 * Failed, for unknown reasons in the crypto subsystem.
+                 * Best we can do is use a constant seed.
+                 */
+                two = 1;
+            }
+            seed = two;
+        } while (seed == 0);
+    }
+
+    /* RandomTag */
+    for (i = offset = 0; i < 4; ++i) {
+        /* NextRandomTagBit */
+        int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^
+                   extract32(seed, 2, 1) ^ extract32(seed, 0, 1));
+        seed = (top << 15) | (seed >> 1);
+        offset |= top << i;
+    }
+    rtag = choose_nonexcluded_tag(start, offset, exclude);
+    env->cp15.rgsr_el1 = rtag | (seed << 8);
+
+    return address_with_allocation_tag(rn, rtag);
+}
+
+uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr,
+                         int32_t offset, uint32_t tag_offset)
+{
+    int start_tag = allocation_tag_from_addr(ptr);
+    uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16);
+    int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude);
+
+    return address_with_allocation_tag(ptr + offset, rtag);
+}
+
+static int load_tag1(uint64_t ptr, uint8_t *mem)
+{
+    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+    return extract32(*mem, ofs, 4);
+}
+
+uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+    int mmu_idx = cpu_mmu_index(env, false);
+    uint8_t *mem;
+    int rtag = 0;
+
+    /* Trap if accessing an invalid page.  */
+    mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1,
+                             MMU_DATA_LOAD, 1, GETPC());
+
+    /* Load if page supports tags. */
+    if (mem) {
+        rtag = load_tag1(ptr, mem);
+    }
+
+    return address_with_allocation_tag(xt, rtag);
+}
+
+static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra)
+{
+    if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) {
+        arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE,
+                                    cpu_mmu_index(env, false), ra);
+        g_assert_not_reached();
+    }
+}
+
+/* For use in a non-parallel context, store to the given nibble.  */
+static void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
+{
+    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+    *mem = deposit32(*mem, ofs, 4, tag);
+}
+
+/* For use in a parallel context, atomically store to the given nibble.  */
+static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag)
+{
+    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+    uint8_t old = atomic_read(mem);
+
+    while (1) {
+        uint8_t new = deposit32(old, ofs, 4, tag);
+        uint8_t cmp = atomic_cmpxchg(mem, old, new);
+        if (likely(cmp == old)) {
+            return;
+        }
+        old = cmp;
+    }
+}
+
+typedef void stg_store1(uint64_t, uint8_t *, int);
+
+static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt,
+                          uintptr_t ra, stg_store1 store1)
+{
+    int mmu_idx = cpu_mmu_index(env, false);
+    uint8_t *mem;
+
+    check_tag_aligned(env, ptr, ra);
+
+    /* Trap if accessing an invalid page.  */
+    mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE,
+                             MMU_DATA_STORE, 1, ra);
+
+    /* Store if page supports tags. */
+    if (mem) {
+        store1(ptr, mem, allocation_tag_from_addr(xt));
+    }
+}
+
+void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+    do_stg(env, ptr, xt, GETPC(), store_tag1);
+}
+
+void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+    do_stg(env, ptr, xt, GETPC(), store_tag1_parallel);
+}
+
+void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr)
+{
+    int mmu_idx = cpu_mmu_index(env, false);
+    uintptr_t ra = GETPC();
+
+    check_tag_aligned(env, ptr, ra);
+    probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra);
+}
+
+static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt,
+                           uintptr_t ra, stg_store1 store1)
+{
+    int mmu_idx = cpu_mmu_index(env, false);
+    int tag = allocation_tag_from_addr(xt);
+    uint8_t *mem1, *mem2;
+
+    check_tag_aligned(env, ptr, ra);
+
+    /*
+     * Trap if accessing an invalid page(s).
+     * This takes priority over !allocation_tag_access_enabled.
+     */
+    if (ptr & TAG_GRANULE) {
+        /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */
+        mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+                                  TAG_GRANULE, MMU_DATA_STORE, 1, ra);
+        mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE,
+                                  MMU_DATA_STORE, TAG_GRANULE,
+                                  MMU_DATA_STORE, 1, ra);
+
+        /* Store if page(s) support tags. */
+        if (mem1) {
+            store1(TAG_GRANULE, mem1, tag);
+        }
+        if (mem2) {
+            store1(0, mem2, tag);
+        }
+    } else {
+        /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */
+        mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+                                  2 * TAG_GRANULE, MMU_DATA_STORE, 1, ra);
+        if (mem1) {
+            tag |= tag << 4;
+            atomic_set(mem1, tag);
+        }
+    }
+}
+
+void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+    do_st2g(env, ptr, xt, GETPC(), store_tag1);
+}
+
+void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+    do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel);
+}
+
+void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr)
+{
+    uc_engine *uc = env->uc;
+    int mmu_idx = cpu_mmu_index(env, false);
+    uintptr_t ra = GETPC();
+    int in_page = -(ptr | TARGET_PAGE_MASK);
+
+    check_tag_aligned(env, ptr, ra);
+
+    if (likely(in_page >= 2 * TAG_GRANULE)) {
+        probe_write(env, ptr, 2 * TAG_GRANULE, mmu_idx, ra);
+    } else {
+        probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra);
+        probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra);
+    }
+}
+
+#define LDGM_STGM_SIZE  (4 << GMID_EL1_BS)
+
+uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)
+{
+    int mmu_idx = cpu_mmu_index(env, false);
+    uintptr_t ra = GETPC();
+    void *tag_mem;
+
+    ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE);
+
+    /* Trap if accessing an invalid page.  */
+    tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD,
+                                 LDGM_STGM_SIZE, MMU_DATA_LOAD,
+                                 LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra);
+
+    /* The tag is squashed to zero if the page does not support tags.  */
+    if (!tag_mem) {
+        return 0;
+    }
+
+    QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6);
+    /*
+     * We are loading 64-bits worth of tags.  The ordering of elements
+     * within the word corresponds to a 64-bit little-endian operation.
+     */
+    return ldq_le_p(tag_mem);
+}
+
+void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+    int mmu_idx = cpu_mmu_index(env, false);
+    uintptr_t ra = GETPC();
+    void *tag_mem;
+
+    ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE);
+
+    /* Trap if accessing an invalid page.  */
+    tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+                                 LDGM_STGM_SIZE, MMU_DATA_LOAD,
+                                 LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra);
+
+    /*
+     * Tag store only happens if the page support tags,
+     * and if the OS has enabled access to the tags.
+     */
+    if (!tag_mem) {
+        return;
+    }
+
+    QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6);
+    /*
+     * We are storing 64-bits worth of tags.  The ordering of elements
+     * within the word corresponds to a 64-bit little-endian operation.
+     */
+    stq_le_p(tag_mem, val);
+}
+
+void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+    uintptr_t ra = GETPC();
+    int mmu_idx = cpu_mmu_index(env, false);
+    int log2_dcz_bytes, log2_tag_bytes;
+    intptr_t dcz_bytes, tag_bytes;
+    uint8_t *mem;
+
+    /*
+     * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1,
+     * i.e. 32 bytes, which is an unreasonably small dcz anyway,
+     * to make sure that we can access one complete tag byte here.
+     */
+    log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2;
+    log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1);
+    dcz_bytes = (intptr_t)1 << log2_dcz_bytes;
+    tag_bytes = (intptr_t)1 << log2_tag_bytes;
+    ptr &= -dcz_bytes;
+
+    mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes,
+                             MMU_DATA_STORE, tag_bytes, ra);
+    if (mem) {
+        int tag_pair = (val & 0xf) * 0x11;
+        memset(mem, tag_pair, tag_bytes);
+    }
+}
+
+/* Record a tag check failure.  */
+static void mte_check_fail(CPUARMState *env, int mmu_idx,
+                           uint64_t dirty_ptr, uintptr_t ra)
+{
+    ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
+    int el, reg_el, tcf, select;
+    uint64_t sctlr;
+
+    reg_el = regime_el(env, arm_mmu_idx);
+    sctlr = env->cp15.sctlr_el[reg_el];
+
+    switch (arm_mmu_idx) {
+    case ARMMMUIdx_E10_0:
+    case ARMMMUIdx_E20_0:
+        el = 0;
+        tcf = extract64(sctlr, 38, 2);
+        break;
+    default:
+        el = reg_el;
+        tcf = extract64(sctlr, 40, 2);
+    }
+
+    switch (tcf) {
+    case 1:
+        /*
+         * Tag check fail causes a synchronous exception.
+         *
+         * In restore_state_to_opc, we set the exception syndrome
+         * for the load or store operation.  Unwind first so we
+         * may overwrite that with the syndrome for the tag check.
+         */
+        cpu_restore_state(env_cpu(env), ra, true);
+        env->exception.vaddress = dirty_ptr;
+        raise_exception(env, EXCP_DATA_ABORT,
+                        syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11),
+                        exception_target_el(env));
+        /* noreturn, but fall through to the assert anyway */
+
+    case 0:
+        /*
+         * Tag check fail does not affect the PE.
+         * We eliminate this case by not setting MTE_ACTIVE
+         * in tb_flags, so that we never make this runtime call.
+         */
+        g_assert_not_reached();
+
+    case 2:
+        /* Tag check fail causes asynchronous flag set.  */
+        mmu_idx = arm_mmu_idx_el(env, el);
+        if (regime_has_2_ranges(mmu_idx)) {
+            select = extract64(dirty_ptr, 55, 1);
+        } else {
+            select = 0;
+        }
+        env->cp15.tfsr_el[el] |= 1 << select;
+        break;
+
+    default:
+        /* Case 3: Reserved. */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Tag check failure with SCTLR_EL%d.TCF%s "
+                      "set to reserved value %d\n",
+                      reg_el, el ? "" : "0", tcf);
+        break;
+    }
+}
+
+/*
+ * Perform an MTE checked access for a single logical or atomic access.
+ */
+static bool mte_probe1_int(CPUARMState *env, uint32_t desc, uint64_t ptr,
+                           uintptr_t ra, int bit55)
+{
+    int mem_tag, mmu_idx, ptr_tag, size;
+    MMUAccessType type;
+    uint8_t *mem;
+
+    ptr_tag = allocation_tag_from_addr(ptr);
+
+    if (tcma_check(desc, bit55, ptr_tag)) {
+        return true;
+    }
+
+    mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+    type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD;
+    size = FIELD_EX32(desc, MTEDESC, ESIZE);
+
+    mem = allocation_tag_mem(env, mmu_idx, ptr, type, size,
+                             MMU_DATA_LOAD, 1, ra);
+    if (!mem) {
+        return true;
+    }
+
+    mem_tag = load_tag1(ptr, mem);
+    return ptr_tag == mem_tag;
+}
+
+/*
+ * No-fault version of mte_check1, to be used by SVE for MemSingleNF.
+ * Returns false if the access is Checked and the check failed.  This
+ * is only intended to probe the tag -- the validity of the page must
+ * be checked beforehand.
+ */
+bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+    int bit55 = extract64(ptr, 55, 1);
+
+    /* If TBI is disabled, the access is unchecked. */
+    if (unlikely(!tbi_check(desc, bit55))) {
+        return true;
+    }
+
+    return mte_probe1_int(env, desc, ptr, 0, bit55);
+}
+
+uint64_t mte_check1(CPUARMState *env, uint32_t desc,
+                    uint64_t ptr, uintptr_t ra)
+{
+    int bit55 = extract64(ptr, 55, 1);
+
+    /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
+    if (unlikely(!tbi_check(desc, bit55))) {
+        return ptr;
+    }
+
+    if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) {
+        int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+        mte_check_fail(env, mmu_idx, ptr, ra);
+    }
+
+    return useronly_clean_ptr(ptr);
+}
+
+uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+    return mte_check1(env, desc, ptr, GETPC());
+}
+
+/*
+ * Perform an MTE checked access for multiple logical accesses.
+ */
+
+/**
+ * checkN:
+ * @tag: tag memory to test
+ * @odd: true to begin testing at tags at odd nibble
+ * @cmp: the tag to compare against
+ * @count: number of tags to test
+ *
+ * Return the number of successful tests.
+ * Thus a return value < @count indicates a failure.
+ *
+ * A note about sizes: count is expected to be small.
+ *
+ * The most common use will be LDP/STP of two integer registers,
+ * which means 16 bytes of memory touching at most 2 tags, but
+ * often the access is aligned and thus just 1 tag.
+ *
+ * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory,
+ * touching at most 5 tags.  SVE LDR/STR (vector) with the default
+ * vector length is also 64 bytes; the maximum architectural length
+ * is 256 bytes touching at most 9 tags.
+ *
+ * The loop below uses 7 logical operations and 1 memory operation
+ * per tag pair.  An implementation that loads an aligned word and
+ * uses masking to ignore adjacent tags requires 18 logical operations
+ * and thus does not begin to pay off until 6 tags.
+ * Which, according to the survey above, is unlikely to be common.
+ */
+static int checkN(uint8_t *mem, int odd, int cmp, int count)
+{
+    int n = 0, diff;
+
+    /* Replicate the test tag and compare.  */
+    cmp *= 0x11;
+    diff = *mem++ ^ cmp;
+
+    if (odd) {
+        goto start_odd;
+    }
+
+    while (1) {
+        /* Test even tag. */
+        if (unlikely((diff) & 0x0f)) {
+            break;
+        }
+        if (++n == count) {
+            break;
+        }
+
+    start_odd:
+        /* Test odd tag. */
+        if (unlikely((diff) & 0xf0)) {
+            break;
+        }
+        if (++n == count) {
+            break;
+        }
+
+        diff = *mem++ ^ cmp;
+    }
+    return n;
+}
+
+uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
+                    uint64_t ptr, uintptr_t ra)
+{
+    uc_engine *uc = env->uc;
+    int mmu_idx, ptr_tag, bit55;
+    uint64_t ptr_last, ptr_end, prev_page, next_page;
+    uint64_t tag_first, tag_end;
+    uint64_t tag_byte_first, tag_byte_end;
+    uint32_t esize, total, tag_count, tag_size, n, c;
+    uint8_t *mem1, *mem2;
+    MMUAccessType type;
+
+    bit55 = extract64(ptr, 55, 1);
+
+    /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
+    if (unlikely(!tbi_check(desc, bit55))) {
+        return ptr;
+    }
+
+    ptr_tag = allocation_tag_from_addr(ptr);
+
+    if (tcma_check(desc, bit55, ptr_tag)) {
+        goto done;
+    }
+
+    mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+    type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD;
+    esize = FIELD_EX32(desc, MTEDESC, ESIZE);
+    total = FIELD_EX32(desc, MTEDESC, TSIZE);
+
+    /* Find the addr of the end of the access, and of the last element. */
+    ptr_end = ptr + total;
+    ptr_last = ptr_end - esize;
+
+    /* Round the bounds to the tag granule, and compute the number of tags. */
+    tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
+    tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE);
+    tag_count = (tag_end - tag_first) / TAG_GRANULE;
+
+    /* Round the bounds to twice the tag granule, and compute the bytes. */
+    tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE);
+    tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE);
+
+    /* Locate the page boundaries. */
+    prev_page = ptr & TARGET_PAGE_MASK;
+    next_page = prev_page + TARGET_PAGE_SIZE;
+
+    if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) {
+        /* Memory access stays on one page. */
+        tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE);
+        mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total,
+                                  MMU_DATA_LOAD, tag_size, ra);
+        if (!mem1) {
+            goto done;
+        }
+        /* Perform all of the comparisons. */
+        n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count);
+    } else {
+        /* Memory access crosses to next page. */
+        tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE);
+        mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr,
+                                  MMU_DATA_LOAD, tag_size, ra);
+
+        tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE);
+        mem2 = allocation_tag_mem(env, mmu_idx, next_page, type,
+                                  ptr_end - next_page,
+                                  MMU_DATA_LOAD, tag_size, ra);
+
+        /*
+         * Perform all of the comparisons.
+         * Note the possible but unlikely case of the operation spanning
+         * two pages that do not both have tagging enabled.
+         */
+        n = c = (next_page - tag_first) / TAG_GRANULE;
+        if (mem1) {
+            n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c);
+        }
+        if (n == c) {
+            if (!mem2) {
+                goto done;
+            }
+            n += checkN(mem2, 0, ptr_tag, tag_count - c);
+        }
+    }
+
+    /*
+     * If we failed, we know which granule.  Compute the element that
+     * is first in that granule, and signal failure on that element.
+     */
+    if (unlikely(n < tag_count)) {
+        uint64_t fail_ofs;
+
+        fail_ofs = tag_first + n * TAG_GRANULE - ptr;
+        fail_ofs = ROUND_UP(fail_ofs, esize);
+        mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra);
+    }
+
+ done:
+    return useronly_clean_ptr(ptr);
+}
+
+uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+    return mte_checkN(env, desc, ptr, GETPC());
+}
+
+/*
+ * Perform an MTE checked access for DC_ZVA.
+ */
+uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+    uintptr_t ra = GETPC();
+    int log2_dcz_bytes, log2_tag_bytes;
+    int mmu_idx, bit55;
+    intptr_t dcz_bytes, tag_bytes, i;
+    void *mem;
+    uint64_t ptr_tag, mem_tag, align_ptr;
+
+    bit55 = extract64(ptr, 55, 1);
+
+    /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
+    if (unlikely(!tbi_check(desc, bit55))) {
+        return ptr;
+    }
+
+    ptr_tag = allocation_tag_from_addr(ptr);
+
+    if (tcma_check(desc, bit55, ptr_tag)) {
+        goto done;
+    }
+
+    /*
+     * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1,
+     * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make
+     * sure that we can access one complete tag byte here.
+     */
+    log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2;
+    log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1);
+    dcz_bytes = (intptr_t)1 << log2_dcz_bytes;
+    tag_bytes = (intptr_t)1 << log2_tag_bytes;
+    align_ptr = ptr & -dcz_bytes;
+
+    /*
+     * Trap if accessing an invalid page.  DC_ZVA requires that we supply
+     * the original pointer for an invalid page.  But watchpoints require
+     * that we probe the actual space.  So do both.
+     */
+    mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+    (void) probe_write(env, ptr, 1, mmu_idx, ra);
+    mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE,
+                             dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra);
+    if (!mem) {
+        goto done;
+    }
+
+    /*
+     * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus
+     * it is quite easy to perform all of the comparisons at once without
+     * any extra masking.
+     *
+     * The most common zva block size is 64; some of the thunderx cpus use
+     * a block size of 128.  For user-only, aarch64_max_initfn will set the
+     * block size to 512.  Fill out the other cases for future-proofing.
+     *
+     * In order to be able to find the first miscompare later, we want the
+     * tag bytes to be in little-endian order.
+     */
+    switch (log2_tag_bytes) {
+    case 0: /* zva_blocksize 32 */
+        mem_tag = *(uint8_t *)mem;
+        ptr_tag *= 0x11u;
+        break;
+    case 1: /* zva_blocksize 64 */
+        mem_tag = cpu_to_le16(*(uint16_t *)mem);
+        ptr_tag *= 0x1111u;
+        break;
+    case 2: /* zva_blocksize 128 */
+        mem_tag = cpu_to_le32(*(uint32_t *)mem);
+        ptr_tag *= 0x11111111u;
+        break;
+    case 3: /* zva_blocksize 256 */
+        mem_tag = cpu_to_le64(*(uint64_t *)mem);
+        ptr_tag *= 0x1111111111111111ull;
+        break;
+
+    default: /* zva_blocksize 512, 1024, 2048 */
+        ptr_tag *= 0x1111111111111111ull;
+        i = 0;
+        do {
+            mem_tag = cpu_to_le64(*(uint64_t *)((char*)mem + i));
+            if (unlikely(mem_tag != ptr_tag)) {
+                goto fail;
+            }
+            i += 8;
+            align_ptr += 16 * TAG_GRANULE;
+        } while (i < tag_bytes);
+        goto done;
+    }
+
+    if (likely(mem_tag == ptr_tag)) {
+        goto done;
+    }
+
+ fail:
+    /* Locate the first nibble that differs. */
+    i = ctz64(mem_tag ^ ptr_tag) >> 4;
+    mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra);
+
+ done:
+    return useronly_clean_ptr(ptr);
+}
diff --git a/qemu/target/arm/neon_helper.c b/qemu/target/arm/neon_helper.c
index 0c2828e6f3..7a9568a4e6 100644
--- a/qemu/target/arm/neon_helper.c
+++ b/qemu/target/arm/neon_helper.c
@@ -562,24 +562,6 @@ uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2)
     return dest;
 }
 
-#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0
-NEON_VOP(cgt_s8, neon_s8, 4)
-NEON_VOP(cgt_u8, neon_u8, 4)
-NEON_VOP(cgt_s16, neon_s16, 2)
-NEON_VOP(cgt_u16, neon_u16, 2)
-NEON_VOP(cgt_s32, neon_s32, 1)
-NEON_VOP(cgt_u32, neon_u32, 1)
-#undef NEON_FN
-
-#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0
-NEON_VOP(cge_s8, neon_s8, 4)
-NEON_VOP(cge_u8, neon_u8, 4)
-NEON_VOP(cge_s16, neon_s16, 2)
-NEON_VOP(cge_u16, neon_u16, 2)
-NEON_VOP(cge_s32, neon_s32, 1)
-NEON_VOP(cge_u32, neon_u32, 1)
-#undef NEON_FN
-
 #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
 NEON_POP(pmin_s8, neon_s8, 4)
 NEON_POP(pmin_u8, neon_u8, 4)
@@ -594,16 +576,6 @@ NEON_POP(pmax_s16, neon_s16, 2)
 NEON_POP(pmax_u16, neon_u16, 2)
 #undef NEON_FN
 
-#define NEON_FN(dest, src1, src2) \
-    dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)
-NEON_VOP(abd_s8, neon_s8, 4)
-NEON_VOP(abd_u8, neon_u8, 4)
-NEON_VOP(abd_s16, neon_s16, 2)
-NEON_VOP(abd_u16, neon_u16, 2)
-NEON_VOP(abd_s32, neon_s32, 1)
-NEON_VOP(abd_u32, neon_u32, 1)
-#undef NEON_FN
-
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
@@ -1135,12 +1107,6 @@ NEON_VOP(tst_u16, neon_u16, 2)
 NEON_VOP(tst_u32, neon_u32, 1)
 #undef NEON_FN
 
-#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0
-NEON_VOP(ceq_u8, neon_u8, 4)
-NEON_VOP(ceq_u16, neon_u16, 2)
-NEON_VOP(ceq_u32, neon_u32, 1)
-#undef NEON_FN
-
 /* Count Leading Sign/Zero Bits.  */
 static inline int do_clz8(uint8_t x)
 {
@@ -1889,13 +1855,6 @@ uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x)
 }
 
 /* NEON Float helpers.  */
-uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    float32 f0 = make_float32(a);
-    float32 f1 = make_float32(b);
-    return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
-}
 
 /* Floating point comparisons produce an integer result.
  * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
diff --git a/qemu/target/arm/op_helper.c b/qemu/target/arm/op_helper.c
index a9cbc79287..8844d13eae 100644
--- a/qemu/target/arm/op_helper.c
+++ b/qemu/target/arm/op_helper.c
@@ -933,6 +933,23 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
     }
 }
 
+void HELPER(probe_access)(CPUARMState *env, target_ulong ptr,
+                          uint32_t access_type, uint32_t mmu_idx,
+                          uint32_t size)
+{
+    uc_engine *uc = env->uc;
+    uint32_t in_page = -((uint32_t)ptr | TARGET_PAGE_SIZE);
+    uintptr_t ra = GETPC();
+
+    if (likely(size <= in_page)) {
+        probe_access(env, ptr, size, access_type, mmu_idx, ra);
+    } else {
+        probe_access(env, ptr, in_page, access_type, mmu_idx, ra);
+        probe_access(env, ptr + in_page, size - in_page,
+                     access_type, mmu_idx, ra);
+    }
+}
+
 uint32_t HELPER(uc_hooksys64)(CPUARMState *env, uint32_t insn, void *hk)
 {
     uc_arm64_reg uc_rt;
diff --git a/qemu/target/arm/pauth_helper.c b/qemu/target/arm/pauth_helper.c
index b909630317..6dbab03768 100644
--- a/qemu/target/arm/pauth_helper.c
+++ b/qemu/target/arm/pauth_helper.c
@@ -300,7 +300,11 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier,
      */
     test = sextract64(ptr, bot_bit, top_bit - bot_bit);
     if (test != 0 && test != -1) {
-        pac ^= MAKE_64BIT_MASK(top_bit - 1, 1);
+        /*
+         * Note that our top_bit is one greater than the pseudocode's
+         * version, hence "- 2" here.
+         */
+        pac ^= MAKE_64BIT_MASK(top_bit - 2, 1);
     }
 
     /*
diff --git a/qemu/target/arm/sve_helper.c b/qemu/target/arm/sve_helper.c
index 2abbeba57b..c575b8f7db 100644
--- a/qemu/target/arm/sve_helper.c
+++ b/qemu/target/arm/sve_helper.c
@@ -27,21 +27,20 @@
 #include "fpu/softfloat.h"
 #include "tcg/tcg.h"
 
-
 /* Note that vector data is stored in host-endian 64-bit chunks,
    so addressing units smaller than that needs a host-endian fixup.  */
 #ifdef HOST_WORDS_BIGENDIAN
-#define H1(x)   ((x) ^ 7)
+#define H1(x) ((x) ^ 7)
 #define H1_2(x) ((x) ^ 6)
 #define H1_4(x) ((x) ^ 4)
-#define H2(x)   ((x) ^ 3)
-#define H4(x)   ((x) ^ 1)
+#define H2(x) ((x) ^ 3)
+#define H4(x) ((x) ^ 1)
 #else
-#define H1(x)   (x)
+#define H1(x) (x)
 #define H1_2(x) (x)
 #define H1_4(x) (x)
-#define H2(x)   (x)
-#define H4(x)   (x)
+#define H2(x) (x)
+#define H4(x) (x)
 #endif
 
 /* Return a value for NZCV as per the ARM PredTest pseudofunction.
@@ -52,7 +51,7 @@
  */
 
 /* For no G bits set, NZCV = C.  */
-#define PREDTEST_INIT  1
+#define PREDTEST_INIT 1
 
 /* This is an iterative function, called for each Pd and Pg word
  * moving forward.
@@ -290,25 +289,25 @@ static inline uint64_t wswap64(uint64_t h)
     return rol64(h, 32);
 }
 
-#define LOGICAL_PPPP(NAME, FUNC) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)  \
-{                                                                         \
-    uintptr_t opr_sz = simd_oprsz(desc);                                  \
-    uint64_t *d = vd, *n = vn, *m = vm, *g = vg;                          \
-    uintptr_t i;                                                          \
-    for (i = 0; i < opr_sz / 8; ++i) {                                    \
-        d[i] = FUNC(n[i], m[i], g[i]);                                    \
-    }                                                                     \
-}
+#define LOGICAL_PPPP(NAME, FUNC)                                               \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)   \
+    {                                                                          \
+        uintptr_t opr_sz = simd_oprsz(desc);                                   \
+        uint64_t *d = vd, *n = vn, *m = vm, *g = vg;                           \
+        uintptr_t i;                                                           \
+        for (i = 0; i < opr_sz / 8; ++i) {                                     \
+            d[i] = FUNC(n[i], m[i], g[i]);                                     \
+        }                                                                      \
+    }
 
-#define DO_AND(N, M, G)  (((N) & (M)) & (G))
-#define DO_BIC(N, M, G)  (((N) & ~(M)) & (G))
-#define DO_EOR(N, M, G)  (((N) ^ (M)) & (G))
-#define DO_ORR(N, M, G)  (((N) | (M)) & (G))
-#define DO_ORN(N, M, G)  (((N) | ~(M)) & (G))
-#define DO_NOR(N, M, G)  (~((N) | (M)) & (G))
+#define DO_AND(N, M, G) (((N) & (M)) & (G))
+#define DO_BIC(N, M, G) (((N) & ~(M)) & (G))
+#define DO_EOR(N, M, G) (((N) ^ (M)) & (G))
+#define DO_ORR(N, M, G) (((N) | (M)) & (G))
+#define DO_ORN(N, M, G) (((N) | ~(M)) & (G))
+#define DO_NOR(N, M, G) (~((N) | (M)) & (G))
 #define DO_NAND(N, M, G) (~((N) & (M)) & (G))
-#define DO_SEL(N, M, G)  (((N) & (G)) | ((M) & ~(G)))
+#define DO_SEL(N, M, G) (((N) & (G)) | ((M) & ~(G)))
 
 LOGICAL_PPPP(sve_and_pppp, DO_AND)
 LOGICAL_PPPP(sve_bic_pppp, DO_BIC)
@@ -337,49 +336,48 @@ LOGICAL_PPPP(sve_nand_pppp, DO_NAND)
  * extra care wrt byte/word ordering we could use gcc generic vectors
  * and do 16 bytes at a time.
  */
-#define DO_ZPZZ(NAME, TYPE, H, OP)                                       \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{                                                                       \
-    intptr_t i, opr_sz = simd_oprsz(desc);                              \
-    for (i = 0; i < opr_sz; ) {                                         \
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));                 \
-        do {                                                            \
-            if (pg & 1) {                                               \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));                         \
-                TYPE mm = *(TYPE *)((char *)vm + H(i));                         \
-                *(TYPE *)((char *)vd + H(i)) = OP(nn, mm);                      \
-            }                                                           \
-            i += sizeof(TYPE), pg >>= sizeof(TYPE);                     \
-        } while (i & 15);                                               \
-    }                                                                   \
-}
+#define DO_ZPZZ(NAME, TYPE, H, OP)                                             \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)   \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc);                                 \
+        for (i = 0; i < opr_sz;) {                                             \
+            uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));            \
+            do {                                                               \
+                if (pg & 1) {                                                  \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    TYPE mm = *(TYPE *)((char *)vm + H(i));                    \
+                    *(TYPE *)((char *)vd + H(i)) = OP(nn, mm);                 \
+                }                                                              \
+                i += sizeof(TYPE), pg >>= sizeof(TYPE);                        \
+            } while (i & 15);                                                  \
+        }                                                                      \
+    }
 
 /* Similarly, specialized for 64-bit operands.  */
-#define DO_ZPZZ_D(NAME, TYPE, OP)                                \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{                                                               \
-    intptr_t i, opr_sz = simd_oprsz(desc) / 8;                  \
-    TYPE *d = vd, *n = vn, *m = vm;                             \
-    uint8_t *pg = vg;                                           \
-    for (i = 0; i < opr_sz; i += 1) {                           \
-        if (pg[H1(i)] & 1) {                                    \
-            TYPE nn = n[i], mm = m[i];                          \
-            d[i] = OP(nn, mm);                                  \
-        }                                                       \
-    }                                                           \
-}
-
-#define DO_AND(N, M)  (N & M)
-#define DO_EOR(N, M)  (N ^ M)
-#define DO_ORR(N, M)  (N | M)
-#define DO_BIC(N, M)  (N & ~M)
-#define DO_ADD(N, M)  (N + M)
-#define DO_SUB(N, M)  (N - M)
-#define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
-#define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
-#define DO_ABD(N, M)  ((N) >= (M) ? (N) - (M) : (M) - (N))
-#define DO_MUL(N, M)  (N * M)
+#define DO_ZPZZ_D(NAME, TYPE, OP)                                              \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)   \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc) / 8;                             \
+        TYPE *d = vd, *n = vn, *m = vm;                                        \
+        uint8_t *pg = vg;                                                      \
+        for (i = 0; i < opr_sz; i += 1) {                                      \
+            if (pg[H1(i)] & 1) {                                               \
+                TYPE nn = n[i], mm = m[i];                                     \
+                d[i] = OP(nn, mm);                                             \
+            }                                                                  \
+        }                                                                      \
+    }
 
+#define DO_AND(N, M) (N & M)
+#define DO_EOR(N, M) (N ^ M)
+#define DO_ORR(N, M) (N | M)
+#define DO_BIC(N, M) (N & ~M)
+#define DO_ADD(N, M) (N + M)
+#define DO_SUB(N, M) (N - M)
+#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
+#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
+#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N))
+#define DO_MUL(N, M) (N * M)
 
 /*
  * We must avoid the C undefined behaviour cases: division by
@@ -431,20 +429,20 @@ DO_ZPZZ(sve_umax_zpzz_h, uint16_t, H1_2, DO_MAX)
 DO_ZPZZ(sve_umax_zpzz_s, uint32_t, H1_4, DO_MAX)
 DO_ZPZZ_D(sve_umax_zpzz_d, uint64_t, DO_MAX)
 
-DO_ZPZZ(sve_smin_zpzz_b, int8_t,  H1, DO_MIN)
-DO_ZPZZ(sve_smin_zpzz_h, int16_t,  H1_2, DO_MIN)
-DO_ZPZZ(sve_smin_zpzz_s, int32_t,  H1_4, DO_MIN)
-DO_ZPZZ_D(sve_smin_zpzz_d, int64_t,  DO_MIN)
+DO_ZPZZ(sve_smin_zpzz_b, int8_t, H1, DO_MIN)
+DO_ZPZZ(sve_smin_zpzz_h, int16_t, H1_2, DO_MIN)
+DO_ZPZZ(sve_smin_zpzz_s, int32_t, H1_4, DO_MIN)
+DO_ZPZZ_D(sve_smin_zpzz_d, int64_t, DO_MIN)
 
 DO_ZPZZ(sve_umin_zpzz_b, uint8_t, H1, DO_MIN)
 DO_ZPZZ(sve_umin_zpzz_h, uint16_t, H1_2, DO_MIN)
 DO_ZPZZ(sve_umin_zpzz_s, uint32_t, H1_4, DO_MIN)
 DO_ZPZZ_D(sve_umin_zpzz_d, uint64_t, DO_MIN)
 
-DO_ZPZZ(sve_sabd_zpzz_b, int8_t,  H1, DO_ABD)
-DO_ZPZZ(sve_sabd_zpzz_h, int16_t,  H1_2, DO_ABD)
-DO_ZPZZ(sve_sabd_zpzz_s, int32_t,  H1_4, DO_ABD)
-DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t,  DO_ABD)
+DO_ZPZZ(sve_sabd_zpzz_b, int8_t, H1, DO_ABD)
+DO_ZPZZ(sve_sabd_zpzz_h, int16_t, H1_2, DO_ABD)
+DO_ZPZZ(sve_sabd_zpzz_s, int32_t, H1_4, DO_ABD)
+DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t, DO_ABD)
 
 DO_ZPZZ(sve_uabd_zpzz_b, uint8_t, H1, DO_ABD)
 DO_ZPZZ(sve_uabd_zpzz_h, uint16_t, H1_2, DO_ABD)
@@ -505,9 +503,9 @@ DO_ZPZZ_D(sve_udiv_zpzz_d, uint64_t, DO_UDIV)
 
 /* Note that all bits of the shift are significant
    and not modulo the element size.  */
-#define DO_ASR(N, M)  (N >> MIN(M, sizeof(N) * 8 - 1))
-#define DO_LSR(N, M)  (M < sizeof(N) * 8 ? N >> M : 0)
-#define DO_LSL(N, M)  (M < sizeof(N) * 8 ? N << M : 0)
+#define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1))
+#define DO_LSR(N, M) (M < sizeof(N) * 8 ? N >> M : 0)
+#define DO_LSL(N, M) (M < sizeof(N) * 8 ? N << M : 0)
 
 DO_ZPZZ(sve_asr_zpzz_b, int8_t, H1, DO_ASR)
 DO_ZPZZ(sve_lsr_zpzz_b, uint8_t, H1_2, DO_LSR)
@@ -532,22 +530,22 @@ DO_ZPZZ_D(sve_lsl_zpzz_d, uint64_t, DO_LSL)
  * third operand is "wide".  That is, for D = N op M, the same 64-bit
  * value of M is used with all of the narrower values of N.
  */
-#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP)                               \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{                                                                       \
-    intptr_t i, opr_sz = simd_oprsz(desc);                              \
-    for (i = 0; i < opr_sz; ) {                                         \
-        uint8_t pg = *(uint8_t *)((char *)vg + H1(i >> 3));                     \
-        TYPEW mm = *(TYPEW *)((char *)vm + i);                                  \
-        do {                                                            \
-            if (pg & 1) {                                               \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));                         \
-                *(TYPE *)((char *)vd + H(i)) = OP(nn, mm);                      \
-            }                                                           \
-            i += sizeof(TYPE), pg >>= sizeof(TYPE);                     \
-        } while (i & 7);                                                \
-    }                                                                   \
-}
+#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP)                                      \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)   \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc);                                 \
+        for (i = 0; i < opr_sz;) {                                             \
+            uint8_t pg = *(uint8_t *)((char *)vg + H1(i >> 3));                \
+            TYPEW mm = *(TYPEW *)((char *)vm + i);                             \
+            do {                                                               \
+                if (pg & 1) {                                                  \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    *(TYPE *)((char *)vd + H(i)) = OP(nn, mm);                 \
+                }                                                              \
+                i += sizeof(TYPE), pg >>= sizeof(TYPE);                        \
+            } while (i & 7);                                                   \
+        }                                                                      \
+    }
 
 DO_ZPZW(sve_asr_zpzw_b, int8_t, uint64_t, H1, DO_ASR)
 DO_ZPZW(sve_lsr_zpzw_b, uint8_t, uint64_t, H1, DO_LSR)
@@ -565,47 +563,47 @@ DO_ZPZW(sve_lsl_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
 
 /* Fully general two-operand expander, controlled by a predicate.
  */
-#define DO_ZPZ(NAME, TYPE, H, OP)                               \
-void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)  \
-{                                                               \
-    intptr_t i, opr_sz = simd_oprsz(desc);                      \
-    for (i = 0; i < opr_sz; ) {                                 \
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));         \
-        do {                                                    \
-            if (pg & 1) {                                       \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));                 \
-                *(TYPE *)((char *)vd + H(i)) = OP(nn);                  \
-            }                                                   \
-            i += sizeof(TYPE), pg >>= sizeof(TYPE);             \
-        } while (i & 15);                                       \
-    }                                                           \
-}
+#define DO_ZPZ(NAME, TYPE, H, OP)                                              \
+    void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)             \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc);                                 \
+        for (i = 0; i < opr_sz;) {                                             \
+            uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));            \
+            do {                                                               \
+                if (pg & 1) {                                                  \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    *(TYPE *)((char *)vd + H(i)) = OP(nn);                     \
+                }                                                              \
+                i += sizeof(TYPE), pg >>= sizeof(TYPE);                        \
+            } while (i & 15);                                                  \
+        }                                                                      \
+    }
 
 /* Similarly, specialized for 64-bit operands.  */
-#define DO_ZPZ_D(NAME, TYPE, OP)                                \
-void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)  \
-{                                                               \
-    intptr_t i, opr_sz = simd_oprsz(desc) / 8;                  \
-    TYPE *d = vd, *n = vn;                                      \
-    uint8_t *pg = vg;                                           \
-    for (i = 0; i < opr_sz; i += 1) {                           \
-        if (pg[H1(i)] & 1) {                                    \
-            TYPE nn = n[i];                                     \
-            d[i] = OP(nn);                                      \
-        }                                                       \
-    }                                                           \
-}
-
-#define DO_CLS_B(N)   (clrsb32(N) - 24)
-#define DO_CLS_H(N)   (clrsb32(N) - 16)
+#define DO_ZPZ_D(NAME, TYPE, OP)                                               \
+    void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)             \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc) / 8;                             \
+        TYPE *d = vd, *n = vn;                                                 \
+        uint8_t *pg = vg;                                                      \
+        for (i = 0; i < opr_sz; i += 1) {                                      \
+            if (pg[H1(i)] & 1) {                                               \
+                TYPE nn = n[i];                                                \
+                d[i] = OP(nn);                                                 \
+            }                                                                  \
+        }                                                                      \
+    }
+
+#define DO_CLS_B(N) (clrsb32(N) - 24)
+#define DO_CLS_H(N) (clrsb32(N) - 16)
 
 DO_ZPZ(sve_cls_b, int8_t, H1, DO_CLS_B)
 DO_ZPZ(sve_cls_h, int16_t, H1_2, DO_CLS_H)
 DO_ZPZ(sve_cls_s, int32_t, H1_4, clrsb32)
 DO_ZPZ_D(sve_cls_d, int64_t, clrsb64)
 
-#define DO_CLZ_B(N)   (clz32(N) - 24)
-#define DO_CLZ_H(N)   (clz32(N) - 16)
+#define DO_CLZ_B(N) (clz32(N) - 24)
+#define DO_CLZ_H(N) (clz32(N) - 16)
 
 DO_ZPZ(sve_clz_b, uint8_t, H1, DO_CLZ_B)
 DO_ZPZ(sve_clz_h, uint16_t, H1_2, DO_CLZ_H)
@@ -617,7 +615,7 @@ DO_ZPZ(sve_cnt_zpz_h, uint16_t, H1_2, ctpop16)
 DO_ZPZ(sve_cnt_zpz_s, uint32_t, H1_4, ctpop32)
 DO_ZPZ_D(sve_cnt_zpz_d, uint64_t, ctpop64)
 
-#define DO_CNOT(N)    (N == 0)
+#define DO_CNOT(N) (N == 0)
 
 DO_ZPZ(sve_cnot_b, uint8_t, H1, DO_CNOT)
 DO_ZPZ(sve_cnot_h, uint16_t, H1_2, DO_CNOT)
@@ -625,15 +623,15 @@ DO_ZPZ(sve_cnot_s, uint32_t, H1_4, DO_CNOT)
 DO_ZPZ_D(sve_cnot_d, uint64_t, DO_CNOT)
 
 #ifdef _MSC_VER
-#define DO_FABS16(N)    (N & ((uint16_t)-1 >> 1))
-#define DO_FABS32(N)    (N & ((uint32_t)-1 >> 1))
-#define DO_FABS64(N)    (N & ((uint64_t)-1 >> 1))
+#define DO_FABS16(N) (N & ((uint16_t)-1 >> 1))
+#define DO_FABS32(N) (N & ((uint32_t)-1 >> 1))
+#define DO_FABS64(N) (N & ((uint64_t)-1 >> 1))
 
 DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS16)
 DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS32)
 DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS64)
 #else
-#define DO_FABS(N)    (N & ((__typeof(N))-1 >> 1))
+#define DO_FABS(N) (N & ((__typeof(N))-1 >> 1))
 
 DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS)
 DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
@@ -641,34 +639,34 @@ DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
 #endif
 
 #ifdef _MSC_VER
-#define DO_FNEG16(N)    (N ^ ~((uint16_t)-1 >> 1))
-#define DO_FNEG32(N)    (N ^ ~((uint32_t)-1 >> 1))
-#define DO_FNEG64(N)    (N ^ ~((uint64_t)-1 >> 1))
+#define DO_FNEG16(N) (N ^ ~((uint16_t)-1 >> 1))
+#define DO_FNEG32(N) (N ^ ~((uint32_t)-1 >> 1))
+#define DO_FNEG64(N) (N ^ ~((uint64_t)-1 >> 1))
 
 DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG16)
 DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG32)
 DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG64)
 #else
-#define DO_FNEG(N)    (N ^ ~((__typeof(N))-1 >> 1))
+#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
 
 DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
 DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
 DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
 #endif
 
-#define DO_NOT(N)    (~N)
+#define DO_NOT(N) (~N)
 
 DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
 DO_ZPZ(sve_not_zpz_h, uint16_t, H1_2, DO_NOT)
 DO_ZPZ(sve_not_zpz_s, uint32_t, H1_4, DO_NOT)
 DO_ZPZ_D(sve_not_zpz_d, uint64_t, DO_NOT)
 
-#define DO_SXTB(N)    ((int8_t)N)
-#define DO_SXTH(N)    ((int16_t)N)
-#define DO_SXTS(N)    ((int32_t)N)
-#define DO_UXTB(N)    ((uint8_t)N)
-#define DO_UXTH(N)    ((uint16_t)N)
-#define DO_UXTS(N)    ((uint32_t)N)
+#define DO_SXTB(N) ((int8_t)N)
+#define DO_SXTH(N) ((int16_t)N)
+#define DO_SXTS(N) ((int32_t)N)
+#define DO_UXTB(N) ((uint8_t)N)
+#define DO_UXTH(N) ((uint16_t)N)
+#define DO_UXTS(N) ((uint32_t)N)
 
 DO_ZPZ(sve_sxtb_h, uint16_t, H1_2, DO_SXTB)
 DO_ZPZ(sve_sxtb_s, uint32_t, H1_4, DO_SXTB)
@@ -685,9 +683,9 @@ DO_ZPZ_D(sve_uxth_d, uint64_t, DO_UXTH)
 DO_ZPZ_D(sve_uxtw_d, uint64_t, DO_UXTS)
 
 #ifdef _MSC_VER
-#define DO_ABS(N)    (N < 0 ? (0 - N) : N)
+#define DO_ABS(N) (N < 0 ? (0 - N) : N)
 #else
-#define DO_ABS(N)    (N < 0 ? -N : N)
+#define DO_ABS(N) (N < 0 ? -N : N)
 #endif
 
 DO_ZPZ(sve_abs_b, int8_t, H1, DO_ABS)
@@ -696,9 +694,9 @@ DO_ZPZ(sve_abs_s, int32_t, H1_4, DO_ABS)
 DO_ZPZ_D(sve_abs_d, int64_t, DO_ABS)
 
 #ifdef _MSC_VER
-#define DO_NEG(N)    (0 - N)
+#define DO_NEG(N) (0 - N)
 #else
-#define DO_NEG(N)    (-N)
+#define DO_NEG(N) (-N)
 #endif
 
 DO_ZPZ(sve_neg_b, uint8_t, H1, DO_NEG)
@@ -722,19 +720,19 @@ DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64)
 
 /* Three-operand expander, unpredicated, in which the third operand is "wide".
  */
-#define DO_ZZW(NAME, TYPE, TYPEW, H, OP)                       \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
-{                                                              \
-    intptr_t i, opr_sz = simd_oprsz(desc);                     \
-    for (i = 0; i < opr_sz; ) {                                \
-        TYPEW mm = *(TYPEW *)((char *)vm + i);                         \
-        do {                                                   \
-            TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
-            *(TYPE *)((char *)vd + H(i)) = OP(nn, mm);                 \
-            i += sizeof(TYPE);                                 \
-        } while (i & 7);                                       \
-    }                                                          \
-}
+#define DO_ZZW(NAME, TYPE, TYPEW, H, OP)                                       \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)             \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc);                                 \
+        for (i = 0; i < opr_sz;) {                                             \
+            TYPEW mm = *(TYPEW *)((char *)vm + i);                             \
+            do {                                                               \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                        \
+                *(TYPE *)((char *)vd + H(i)) = OP(nn, mm);                     \
+                i += sizeof(TYPE);                                             \
+            } while (i & 7);                                                   \
+        }                                                                      \
+    }
 
 DO_ZZW(sve_asr_zzw_b, int8_t, uint64_t, H1, DO_ASR)
 DO_ZZW(sve_lsr_zzw_b, uint8_t, uint64_t, H1, DO_LSR)
@@ -771,39 +769,39 @@ DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
 /* ??? If we were to vectorize this by hand the reduction ordering
  * would change.  For integer operands, this is perfectly fine.
  */
-#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \
-uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc)   \
-{                                                          \
-    intptr_t i, opr_sz = simd_oprsz(desc);                 \
-    TYPERED ret = INIT;                                    \
-    for (i = 0; i < opr_sz; ) {                            \
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));    \
-        do {                                               \
-            if (pg & 1) {                                  \
-                TYPEELT nn = *(TYPEELT *)((char *)vn + H(i));      \
-                ret = OP(ret, nn);                         \
-            }                                              \
-            i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT);  \
-        } while (i & 15);                                  \
-    }                                                      \
-    return (TYPERET)ret;                                   \
-}
-
-#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP)             \
-uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc)   \
-{                                                          \
-    intptr_t i, opr_sz = simd_oprsz(desc) / 8;             \
-    TYPEE *n = vn;                                         \
-    uint8_t *pg = vg;                                      \
-    TYPER ret = INIT;                                      \
-    for (i = 0; i < opr_sz; i += 1) {                      \
-        if (pg[H1(i)] & 1) {                               \
-            TYPEE nn = n[i];                               \
-            ret = OP(ret, nn);                             \
-        }                                                  \
-    }                                                      \
-    return ret;                                            \
-}
+#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP)                   \
+    uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc)                   \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc);                                 \
+        TYPERED ret = INIT;                                                    \
+        for (i = 0; i < opr_sz;) {                                             \
+            uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));            \
+            do {                                                               \
+                if (pg & 1) {                                                  \
+                    TYPEELT nn = *(TYPEELT *)((char *)vn + H(i));              \
+                    ret = OP(ret, nn);                                         \
+                }                                                              \
+                i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT);                  \
+            } while (i & 15);                                                  \
+        }                                                                      \
+        return (TYPERET)ret;                                                   \
+    }
+
+#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP)                                 \
+    uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc)                   \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc) / 8;                             \
+        TYPEE *n = vn;                                                         \
+        uint8_t *pg = vg;                                                      \
+        TYPER ret = INIT;                                                      \
+        for (i = 0; i < opr_sz; i += 1) {                                      \
+            if (pg[H1(i)] & 1) {                                               \
+                TYPEE nn = n[i];                                               \
+                ret = OP(ret, nn);                                             \
+            }                                                                  \
+        }                                                                      \
+        return ret;                                                            \
+    }
 
 DO_VPZ(sve_orv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_ORR)
 DO_VPZ(sve_orv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_ORR)
@@ -853,17 +851,17 @@ DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN)
 #undef DO_VPZ_D
 
 /* Two vector operand, one scalar operand, unpredicated.  */
-#define DO_ZZI(NAME, TYPE, OP)                                       \
-void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc)   \
-{                                                                    \
-    intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE);            \
-    TYPE s = s64, *d = vd, *n = vn;                                  \
-    for (i = 0; i < opr_sz; ++i) {                                   \
-        d[i] = OP(n[i], s);                                          \
-    }                                                                \
-}
+#define DO_ZZI(NAME, TYPE, OP)                                                 \
+    void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc)         \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE);                  \
+        TYPE s = s64, *d = vd, *n = vn;                                        \
+        for (i = 0; i < opr_sz; ++i) {                                         \
+            d[i] = OP(n[i], s);                                                \
+        }                                                                      \
+    }
 
-#define DO_SUBR(X, Y)   (Y - X)
+#define DO_SUBR(X, Y) (Y - X)
 
 DO_ZZI(sve_subri_b, uint8_t, DO_SUBR)
 DO_ZZI(sve_subri_h, uint16_t, DO_SUBR)
@@ -1094,49 +1092,49 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc)
 
 /* Three-operand expander, immediate operand, controlled by a predicate.
  */
-#define DO_ZPZI(NAME, TYPE, H, OP)                              \
-void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)  \
-{                                                               \
-    intptr_t i, opr_sz = simd_oprsz(desc);                      \
-    TYPE imm = simd_data(desc);                                 \
-    for (i = 0; i < opr_sz; ) {                                 \
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));         \
-        do {                                                    \
-            if (pg & 1) {                                       \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));                 \
-                *(TYPE *)((char *)vd + H(i)) = OP(nn, imm);             \
-            }                                                   \
-            i += sizeof(TYPE), pg >>= sizeof(TYPE);             \
-        } while (i & 15);                                       \
-    }                                                           \
-}
+#define DO_ZPZI(NAME, TYPE, H, OP)                                             \
+    void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)             \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc);                                 \
+        TYPE imm = simd_data(desc);                                            \
+        for (i = 0; i < opr_sz;) {                                             \
+            uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));            \
+            do {                                                               \
+                if (pg & 1) {                                                  \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    *(TYPE *)((char *)vd + H(i)) = OP(nn, imm);                \
+                }                                                              \
+                i += sizeof(TYPE), pg >>= sizeof(TYPE);                        \
+            } while (i & 15);                                                  \
+        }                                                                      \
+    }
 
 /* Similarly, specialized for 64-bit operands.  */
-#define DO_ZPZI_D(NAME, TYPE, OP)                               \
-void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)  \
-{                                                               \
-    intptr_t i, opr_sz = simd_oprsz(desc) / 8;                  \
-    TYPE *d = vd, *n = vn;                                      \
-    TYPE imm = simd_data(desc);                                 \
-    uint8_t *pg = vg;                                           \
-    for (i = 0; i < opr_sz; i += 1) {                           \
-        if (pg[H1(i)] & 1) {                                    \
-            TYPE nn = n[i];                                     \
-            d[i] = OP(nn, imm);                                 \
-        }                                                       \
-    }                                                           \
-}
-
-#define DO_SHR(N, M)  (N >> M)
-#define DO_SHL(N, M)  (N << M)
+#define DO_ZPZI_D(NAME, TYPE, OP)                                              \
+    void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)             \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc) / 8;                             \
+        TYPE *d = vd, *n = vn;                                                 \
+        TYPE imm = simd_data(desc);                                            \
+        uint8_t *pg = vg;                                                      \
+        for (i = 0; i < opr_sz; i += 1) {                                      \
+            if (pg[H1(i)] & 1) {                                               \
+                TYPE nn = n[i];                                                \
+                d[i] = OP(nn, imm);                                            \
+            }                                                                  \
+        }                                                                      \
+    }
+
+#define DO_SHR(N, M) (N >> M)
+#define DO_SHL(N, M) (N << M)
 
 /* Arithmetic shift right for division.  This rounds negative numbers
    toward zero as per signed division.  Therefore before shifting,
    when N is negative, add 2**M-1.  */
 #ifdef _MSC_VER
- #define DO_ASRD(N, M) ((N + (N < 0 ? (1 << M) - 1 : 0)) >> M)
+#define DO_ASRD(N, M) ((N + (N < 0 ? (1 << M) - 1 : 0)) >> M)
 #else
- #define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M)
+#define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M)
 #endif
 
 DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR)
@@ -1167,43 +1165,43 @@ DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD)
 
 /* Fully general four-operand expander, controlled by a predicate.
  */
-#define DO_ZPZZZ(NAME, TYPE, H, OP)                           \
-void HELPER(NAME)(void *vd, void *va, void *vn, void *vm,     \
-                  void *vg, uint32_t desc)                    \
-{                                                             \
-    intptr_t i, opr_sz = simd_oprsz(desc);                    \
-    for (i = 0; i < opr_sz; ) {                               \
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));       \
-        do {                                                  \
-            if (pg & 1) {                                     \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));               \
-                TYPE mm = *(TYPE *)((char *)vm + H(i));               \
-                TYPE aa = *(TYPE *)((char *)va + H(i));               \
-                *(TYPE *)((char *)vd + H(i)) = OP(aa, nn, mm);        \
-            }                                                 \
-            i += sizeof(TYPE), pg >>= sizeof(TYPE);           \
-        } while (i & 15);                                     \
-    }                                                         \
-}
+#define DO_ZPZZZ(NAME, TYPE, H, OP)                                            \
+    void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, void *vg,        \
+                      uint32_t desc)                                           \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc);                                 \
+        for (i = 0; i < opr_sz;) {                                             \
+            uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));            \
+            do {                                                               \
+                if (pg & 1) {                                                  \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    TYPE mm = *(TYPE *)((char *)vm + H(i));                    \
+                    TYPE aa = *(TYPE *)((char *)va + H(i));                    \
+                    *(TYPE *)((char *)vd + H(i)) = OP(aa, nn, mm);             \
+                }                                                              \
+                i += sizeof(TYPE), pg >>= sizeof(TYPE);                        \
+            } while (i & 15);                                                  \
+        }                                                                      \
+    }
 
 /* Similarly, specialized for 64-bit operands.  */
-#define DO_ZPZZZ_D(NAME, TYPE, OP)                            \
-void HELPER(NAME)(void *vd, void *va, void *vn, void *vm,     \
-                  void *vg, uint32_t desc)                    \
-{                                                             \
-    intptr_t i, opr_sz = simd_oprsz(desc) / 8;                \
-    TYPE *d = vd, *a = va, *n = vn, *m = vm;                  \
-    uint8_t *pg = vg;                                         \
-    for (i = 0; i < opr_sz; i += 1) {                         \
-        if (pg[H1(i)] & 1) {                                  \
-            TYPE aa = a[i], nn = n[i], mm = m[i];             \
-            d[i] = OP(aa, nn, mm);                            \
-        }                                                     \
-    }                                                         \
-}
-
-#define DO_MLA(A, N, M)  (A + N * M)
-#define DO_MLS(A, N, M)  (A - N * M)
+#define DO_ZPZZZ_D(NAME, TYPE, OP)                                             \
+    void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, void *vg,        \
+                      uint32_t desc)                                           \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc) / 8;                             \
+        TYPE *d = vd, *a = va, *n = vn, *m = vm;                               \
+        uint8_t *pg = vg;                                                      \
+        for (i = 0; i < opr_sz; i += 1) {                                      \
+            if (pg[H1(i)] & 1) {                                               \
+                TYPE aa = a[i], nn = n[i], mm = m[i];                          \
+                d[i] = OP(aa, nn, mm);                                         \
+            }                                                                  \
+        }                                                                      \
+    }
+
+#define DO_MLA(A, N, M) (A + N * M)
+#define DO_MLS(A, N, M) (A - N * M)
 
 DO_ZPZZZ(sve_mla_b, uint8_t, H1, DO_MLA)
 DO_ZPZZZ(sve_mls_b, uint8_t, H1, DO_MLS)
@@ -1222,8 +1220,7 @@ DO_ZPZZZ_D(sve_mls_d, uint64_t, DO_MLS)
 #undef DO_ZPZZZ
 #undef DO_ZPZZZ_D
 
-void HELPER(sve_index_b)(void *vd, uint32_t start,
-                         uint32_t incr, uint32_t desc)
+void HELPER(sve_index_b)(void *vd, uint32_t start, uint32_t incr, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc);
     uint8_t *d = vd;
@@ -1232,8 +1229,7 @@ void HELPER(sve_index_b)(void *vd, uint32_t start,
     }
 }
 
-void HELPER(sve_index_h)(void *vd, uint32_t start,
-                         uint32_t incr, uint32_t desc)
+void HELPER(sve_index_h)(void *vd, uint32_t start, uint32_t incr, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 2;
     uint16_t *d = vd;
@@ -1242,8 +1238,7 @@ void HELPER(sve_index_h)(void *vd, uint32_t start,
     }
 }
 
-void HELPER(sve_index_s)(void *vd, uint32_t start,
-                         uint32_t incr, uint32_t desc)
+void HELPER(sve_index_s)(void *vd, uint32_t start, uint32_t incr, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 4;
     uint32_t *d = vd;
@@ -1252,8 +1247,7 @@ void HELPER(sve_index_s)(void *vd, uint32_t start,
     }
 }
 
-void HELPER(sve_index_d)(void *vd, uint64_t start,
-                         uint64_t incr, uint32_t desc)
+void HELPER(sve_index_d)(void *vd, uint64_t start, uint64_t incr, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *d = vd;
@@ -1326,22 +1320,16 @@ void HELPER(sve_fexpa_s)(void *vd, void *vn, uint32_t desc)
 {
     /* These constants are cut-and-paste directly from the ARM pseudocode.  */
     static const uint32_t coeff[] = {
-        0x000000, 0x0164d2, 0x02cd87, 0x043a29,
-        0x05aac3, 0x071f62, 0x08980f, 0x0a14d5,
-        0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc,
-        0x11c3d3, 0x135a2b, 0x14f4f0, 0x16942d,
-        0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda,
-        0x1ef532, 0x20b051, 0x227043, 0x243516,
-        0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
-        0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4,
-        0x3504f3, 0x36fd92, 0x38fbaf, 0x3aff5b,
-        0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd,
-        0x45672a, 0x478d75, 0x49b9be, 0x4bec15,
-        0x4e248c, 0x506334, 0x52a81e, 0x54f35b,
-        0x5744fd, 0x599d16, 0x5bfbb8, 0x5e60f5,
-        0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
-        0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177,
-        0x75257d, 0x77d0df, 0x7a83b3, 0x7d3e0c,
+        0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
+        0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
+        0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
+        0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
+        0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
+        0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
+        0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
+        0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
+        0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
+        0x7d3e0c,
     };
     intptr_t i, opr_sz = simd_oprsz(desc) / 4;
     uint32_t *d = vd, *n = vn;
@@ -1573,8 +1561,8 @@ void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc)
 /* Two operand predicated copy immediate with merge.  All valid immediates
  * can fit within 17 signed bits in the simd_data field.
  */
-void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg,
-                         uint64_t mm, uint32_t desc)
+void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg, uint64_t mm,
+                         uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *d = vd, *n = vn;
@@ -1588,8 +1576,8 @@ void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg,
     }
 }
 
-void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg,
-                         uint64_t mm, uint32_t desc)
+void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg, uint64_t mm,
+                         uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *d = vd, *n = vn;
@@ -1603,8 +1591,8 @@ void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg,
     }
 }
 
-void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg,
-                         uint64_t mm, uint32_t desc)
+void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg, uint64_t mm,
+                         uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *d = vd, *n = vn;
@@ -1618,8 +1606,8 @@ void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg,
     }
 }
 
-void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg,
-                         uint64_t mm, uint32_t desc)
+void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg, uint64_t mm,
+                         uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *d = vd, *n = vn;
@@ -1678,7 +1666,7 @@ void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc)
     }
 }
 
-/* Big-endian hosts need to frob the byte indicies.  If the copy
+/* Big-endian hosts need to frob the byte indices.  If the copy
  * happens to be 8-byte aligned, then no frobbing necessary.
  */
 static void swap_memmove(void *vd, void *vs, size_t n)
@@ -1702,7 +1690,7 @@ static void swap_memmove(void *vd, void *vs, size_t n)
                 *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
             }
         } else {
-            for (i = n; i > 0; ) {
+            for (i = n; i > 0;) {
                 i -= 4;
                 *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
             }
@@ -1716,7 +1704,7 @@ static void swap_memmove(void *vd, void *vs, size_t n)
                 *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
             }
         } else {
-            for (i = n; i > 0; ) {
+            for (i = n; i > 0;) {
                 i -= 2;
                 *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
             }
@@ -1729,7 +1717,7 @@ static void swap_memmove(void *vd, void *vs, size_t n)
                 *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
             }
         } else {
-            for (i = n; i > 0; ) {
+            for (i = n; i > 0;) {
                 i -= 1;
                 *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
             }
@@ -1800,13 +1788,13 @@ void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
     }
 }
 
-#define DO_INSR(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \
-{                                                                  \
-    intptr_t opr_sz = simd_oprsz(desc);                            \
-    swap_memmove((char *)vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE));    \
-    *(TYPE *)((char *)vd + H(0)) = val;                                    \
-}
+#define DO_INSR(NAME, TYPE, H)                                                 \
+    void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc)         \
+    {                                                                          \
+        intptr_t opr_sz = simd_oprsz(desc);                                    \
+        swap_memmove((char *)vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE));    \
+        *(TYPE *)((char *)vd + H(0)) = val;                                    \
+    }
 
 DO_INSR(sve_insr_b, uint8_t, H1)
 DO_INSR(sve_insr_h, uint16_t, H1_2)
@@ -1859,21 +1847,21 @@ void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc)
     }
 }
 
-#define DO_TBL(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
-{                                                              \
-    intptr_t i, opr_sz = simd_oprsz(desc);                     \
-    uintptr_t elem = opr_sz / sizeof(TYPE);                    \
-    TYPE *d = vd, *n = vn, *m = vm;                            \
-    ARMVectorReg tmp;                                          \
-    if (unlikely(vd == vn)) {                                  \
-        n = memcpy(&tmp, vn, opr_sz);                          \
-    }                                                          \
-    for (i = 0; i < elem; i++) {                               \
-        TYPE j = m[H(i)];                                      \
-        d[H(i)] = j < elem ? n[H(j)] : 0;                      \
-    }                                                          \
-}
+#define DO_TBL(NAME, TYPE, H)                                                  \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)             \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc);                                 \
+        uintptr_t elem = opr_sz / sizeof(TYPE);                                \
+        TYPE *d = vd, *n = vn, *m = vm;                                        \
+        ARMVectorReg tmp;                                                      \
+        if (unlikely(vd == vn)) {                                              \
+            n = memcpy(&tmp, vn, opr_sz);                                      \
+        }                                                                      \
+        for (i = 0; i < elem; i++) {                                           \
+            TYPE j = m[H(i)];                                                  \
+            d[H(i)] = j < elem ? n[H(j)] : 0;                                  \
+        }                                                                      \
+    }
 
 DO_TBL(sve_tbl_b, uint8_t, H1)
 DO_TBL(sve_tbl_h, uint16_t, H2)
@@ -1882,20 +1870,20 @@ DO_TBL(sve_tbl_d, uint64_t, )
 
 #undef TBL
 
-#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \
-void HELPER(NAME)(void *vd, void *vn, uint32_t desc)           \
-{                                                              \
-    intptr_t i, opr_sz = simd_oprsz(desc);                     \
-    TYPED *d = vd;                                             \
-    TYPES *n = vn;                                             \
-    ARMVectorReg tmp;                                          \
-    if (unlikely((char *)vn - (char *)vd < opr_sz)) {                          \
-        n = memcpy(&tmp, n, opr_sz / 2);                       \
-    }                                                          \
-    for (i = 0; i < opr_sz / sizeof(TYPED); i++) {             \
-        d[HD(i)] = n[HS(i)];                                   \
-    }                                                          \
-}
+#define DO_UNPK(NAME, TYPED, TYPES, HD, HS)                                    \
+    void HELPER(NAME)(void *vd, void *vn, uint32_t desc)                       \
+    {                                                                          \
+        intptr_t i, opr_sz = simd_oprsz(desc);                                 \
+        TYPED *d = vd;                                                         \
+        TYPES *n = vn;                                                         \
+        ARMVectorReg tmp;                                                      \
+        if (unlikely((char *)vn - (char *)vd < opr_sz)) {                      \
+            n = memcpy(&tmp, n, opr_sz / 2);                                   \
+        }                                                                      \
+        for (i = 0; i < opr_sz / sizeof(TYPED); i++) {                         \
+            d[HD(i)] = n[HS(i)];                                               \
+        }                                                                      \
+    }
 
 DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1)
 DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2)
@@ -1912,11 +1900,8 @@ DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4)
  * same pattern out to 16-bit units.
  */
 static const uint64_t even_bit_esz_masks[5] = {
-    0x5555555555555555ull,
-    0x3333333333333333ull,
-    0x0f0f0f0f0f0f0f0full,
-    0x00ff00ff00ff00ffull,
-    0x0000ffff0000ffffull,
+    0x5555555555555555ull, 0x3333333333333333ull, 0x0f0f0f0f0f0f0f0full,
+    0x00ff00ff00ff00ffull, 0x0000ffff0000ffffull,
 };
 
 /* Zero-extend units of 2**N bits to units of 2**(N+1) bits.
@@ -2112,7 +2097,7 @@ static uint64_t reverse_bits_64(uint64_t x, int n)
 
 static uint8_t reverse_bits_8(uint8_t x, int n)
 {
-    static const uint8_t mask[3] = { 0x55, 0x33, 0x0f };
+    static const uint8_t mask[3] = {0x55, 0x33, 0x0f};
     int i, sh;
 
     for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
@@ -2197,68 +2182,72 @@ void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc)
     }
 }
 
-#define DO_ZIP(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)       \
-{                                                                    \
-    intptr_t oprsz = simd_oprsz(desc);                               \
-    intptr_t i, oprsz_2 = oprsz / 2;                                 \
-    ARMVectorReg tmp_n, tmp_m;                                       \
-    /* We produce output faster than we consume input.               \
-       Therefore we must be mindful of possible overlap.  */         \
-    if (unlikely(((char *)vn - (char *)vd) < (uintptr_t)oprsz)) {                    \
-        vn = memcpy(&tmp_n, vn, oprsz_2);                            \
-    }                                                                \
-    if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) {                    \
-        vm = memcpy(&tmp_m, vm, oprsz_2);                            \
-    }                                                                \
-    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                    \
-        *(TYPE *)((char *)vd + H(2 * i + 0)) = *(TYPE *)((char *)vn + H(i));         \
-        *(TYPE *)((char *)vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)((char *)vm + H(i)); \
-    }                                                                \
-}
+#define DO_ZIP(NAME, TYPE, H)                                                  \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)             \
+    {                                                                          \
+        intptr_t oprsz = simd_oprsz(desc);                                     \
+        intptr_t i, oprsz_2 = oprsz / 2;                                       \
+        ARMVectorReg tmp_n, tmp_m;                                             \
+        /* We produce output faster than we consume input.                     \
+           Therefore we must be mindful of possible overlap.  */               \
+        if (unlikely(((char *)vn - (char *)vd) < (uintptr_t)oprsz)) {          \
+            vn = memcpy(&tmp_n, vn, oprsz_2);                                  \
+        }                                                                      \
+        if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) {          \
+            vm = memcpy(&tmp_m, vm, oprsz_2);                                  \
+        }                                                                      \
+        for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                          \
+            *(TYPE *)((char *)vd + H(2 * i + 0)) =                             \
+                *(TYPE *)((char *)vn + H(i));                                  \
+            *(TYPE *)((char *)vd + H(2 * i + sizeof(TYPE))) =                  \
+                *(TYPE *)((char *)vm + H(i));                                  \
+        }                                                                      \
+    }
 
 DO_ZIP(sve_zip_b, uint8_t, H1)
 DO_ZIP(sve_zip_h, uint16_t, H1_2)
 DO_ZIP(sve_zip_s, uint32_t, H1_4)
 DO_ZIP(sve_zip_d, uint64_t, )
 
-#define DO_UZP(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)         \
-{                                                                      \
-    intptr_t oprsz = simd_oprsz(desc);                                 \
-    intptr_t oprsz_2 = oprsz / 2;                                      \
-    intptr_t odd_ofs = simd_data(desc);                                \
-    intptr_t i;                                                        \
-    ARMVectorReg tmp_m;                                                \
-    if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) {                      \
-        vm = memcpy(&tmp_m, vm, oprsz);                                \
-    }                                                                  \
-    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                      \
-        *(TYPE *)((char *)vd + H(i)) = *(TYPE *)((char *)vn + H(2 * i + odd_ofs));     \
-    }                                                                  \
-    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                      \
-        *(TYPE *)((char *)vd + H(oprsz_2 + i)) = *(TYPE *)((char *)vm + H(2 * i + odd_ofs)); \
-    }                                                                  \
-}
+#define DO_UZP(NAME, TYPE, H)                                                  \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)             \
+    {                                                                          \
+        intptr_t oprsz = simd_oprsz(desc);                                     \
+        intptr_t oprsz_2 = oprsz / 2;                                          \
+        intptr_t odd_ofs = simd_data(desc);                                    \
+        intptr_t i;                                                            \
+        ARMVectorReg tmp_m;                                                    \
+        if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) {          \
+            vm = memcpy(&tmp_m, vm, oprsz);                                    \
+        }                                                                      \
+        for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                          \
+            *(TYPE *)((char *)vd + H(i)) =                                     \
+                *(TYPE *)((char *)vn + H(2 * i + odd_ofs));                    \
+        }                                                                      \
+        for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                          \
+            *(TYPE *)((char *)vd + H(oprsz_2 + i)) =                           \
+                *(TYPE *)((char *)vm + H(2 * i + odd_ofs));                    \
+        }                                                                      \
+    }
 
 DO_UZP(sve_uzp_b, uint8_t, H1)
 DO_UZP(sve_uzp_h, uint16_t, H1_2)
 DO_UZP(sve_uzp_s, uint32_t, H1_4)
 DO_UZP(sve_uzp_d, uint64_t, )
 
-#define DO_TRN(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)         \
-{                                                                      \
-    intptr_t oprsz = simd_oprsz(desc);                                 \
-    intptr_t odd_ofs = simd_data(desc);                                \
-    intptr_t i;                                                        \
-    for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) {                    \
-        TYPE ae = *(TYPE *)((char *)vn + H(i + odd_ofs));                      \
-        TYPE be = *(TYPE *)((char *)vm + H(i + odd_ofs));                      \
-        *(TYPE *)((char *)vd + H(i + 0)) = ae;                                 \
-        *(TYPE *)((char *)vd + H(i + sizeof(TYPE))) = be;                      \
-    }                                                                  \
-}
+#define DO_TRN(NAME, TYPE, H)                                                  \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)             \
+    {                                                                          \
+        intptr_t oprsz = simd_oprsz(desc);                                     \
+        intptr_t odd_ofs = simd_data(desc);                                    \
+        intptr_t i;                                                            \
+        for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) {                        \
+            TYPE ae = *(TYPE *)((char *)vn + H(i + odd_ofs));                  \
+            TYPE be = *(TYPE *)((char *)vm + H(i + odd_ofs));                  \
+            *(TYPE *)((char *)vd + H(i + 0)) = ae;                             \
+            *(TYPE *)((char *)vd + H(i + sizeof(TYPE))) = be;                  \
+        }                                                                      \
+    }
 
 DO_TRN(sve_trn_b, uint8_t, H1)
 DO_TRN(sve_trn_h, uint16_t, H1_2)
@@ -2352,8 +2341,8 @@ void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)
     swap_memmove((char *)vd + len, vm, opr_sz * 8 - len);
 }
 
-void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm,
-                            void *vg, uint32_t desc)
+void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *d = vd, *n = vn, *m = vm;
@@ -2366,8 +2355,8 @@ void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm,
     }
 }
 
-void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm,
-                            void *vg, uint32_t desc)
+void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *d = vd, *n = vn, *m = vm;
@@ -2380,8 +2369,8 @@ void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm,
     }
 }
 
-void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm,
-                            void *vg, uint32_t desc)
+void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *d = vd, *n = vn, *m = vm;
@@ -2394,8 +2383,8 @@ void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm,
     }
 }
 
-void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
-                            void *vg, uint32_t desc)
+void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *d = vd, *n = vn, *m = vm;
@@ -2428,63 +2417,64 @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
  * a scalar output, and also handles the byte-ordering of sub-uint64_t
  * scalar outputs, is tricky.
  */
-#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK)                                 \
-uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{                                                                            \
-    intptr_t opr_sz = simd_oprsz(desc);                                      \
-    uint32_t flags = PREDTEST_INIT;                                          \
-    intptr_t i = opr_sz;                                                     \
-    do {                                                                     \
-        uint64_t out = 0, pg;                                                \
-        do {                                                                 \
-            i -= sizeof(TYPE), out <<= sizeof(TYPE);                         \
-            TYPE nn = *(TYPE *)((char *)vn + H(i));                                  \
-            TYPE mm = *(TYPE *)((char *)vm + H(i));                                  \
-            out |= nn OP mm;                                                 \
-        } while (i & 63);                                                    \
-        pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK;                            \
-        out &= pg;                                                           \
-        *(uint64_t *)((char *)vd + (i >> 3)) = out;                                  \
-        flags = iter_predtest_bwd(out, pg, flags);                           \
-    } while (i > 0);                                                         \
-    return flags;                                                            \
-}
-
-#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \
-    DO_CMP_PPZZ(NAME, TYPE, OP, H1,   0xffffffffffffffffull)
-#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \
+#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK)                                   \
+    uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg,              \
+                          uint32_t desc)                                       \
+    {                                                                          \
+        intptr_t opr_sz = simd_oprsz(desc);                                    \
+        uint32_t flags = PREDTEST_INIT;                                        \
+        intptr_t i = opr_sz;                                                   \
+        do {                                                                   \
+            uint64_t out = 0, pg;                                              \
+            do {                                                               \
+                i -= sizeof(TYPE), out <<= sizeof(TYPE);                       \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                        \
+                TYPE mm = *(TYPE *)((char *)vm + H(i));                        \
+                out |= nn OP mm;                                               \
+            } while (i & 63);                                                  \
+            pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK;                  \
+            out &= pg;                                                         \
+            *(uint64_t *)((char *)vd + (i >> 3)) = out;                        \
+            flags = iter_predtest_bwd(out, pg, flags);                         \
+        } while (i > 0);                                                       \
+        return flags;                                                          \
+    }
+
+#define DO_CMP_PPZZ_B(NAME, TYPE, OP)                                          \
+    DO_CMP_PPZZ(NAME, TYPE, OP, H1, 0xffffffffffffffffull)
+#define DO_CMP_PPZZ_H(NAME, TYPE, OP)                                          \
     DO_CMP_PPZZ(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
-#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \
+#define DO_CMP_PPZZ_S(NAME, TYPE, OP)                                          \
     DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
-#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \
-    DO_CMP_PPZZ(NAME, TYPE, OP,     , 0x0101010101010101ull)
+#define DO_CMP_PPZZ_D(NAME, TYPE, OP)                                          \
+    DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull)
 
-DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t,  ==)
+DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==)
 DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==)
 DO_CMP_PPZZ_S(sve_cmpeq_ppzz_s, uint32_t, ==)
 DO_CMP_PPZZ_D(sve_cmpeq_ppzz_d, uint64_t, ==)
 
-DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t,  !=)
+DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t, !=)
 DO_CMP_PPZZ_H(sve_cmpne_ppzz_h, uint16_t, !=)
 DO_CMP_PPZZ_S(sve_cmpne_ppzz_s, uint32_t, !=)
 DO_CMP_PPZZ_D(sve_cmpne_ppzz_d, uint64_t, !=)
 
-DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t,  >)
+DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t, >)
 DO_CMP_PPZZ_H(sve_cmpgt_ppzz_h, int16_t, >)
 DO_CMP_PPZZ_S(sve_cmpgt_ppzz_s, int32_t, >)
 DO_CMP_PPZZ_D(sve_cmpgt_ppzz_d, int64_t, >)
 
-DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t,  >=)
+DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t, >=)
 DO_CMP_PPZZ_H(sve_cmpge_ppzz_h, int16_t, >=)
 DO_CMP_PPZZ_S(sve_cmpge_ppzz_s, int32_t, >=)
 DO_CMP_PPZZ_D(sve_cmpge_ppzz_d, int64_t, >=)
 
-DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t,  >)
+DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t, >)
 DO_CMP_PPZZ_H(sve_cmphi_ppzz_h, uint16_t, >)
 DO_CMP_PPZZ_S(sve_cmphi_ppzz_s, uint32_t, >)
 DO_CMP_PPZZ_D(sve_cmphi_ppzz_d, uint64_t, >)
 
-DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t,  >=)
+DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t, >=)
 DO_CMP_PPZZ_H(sve_cmphs_ppzz_h, uint16_t, >=)
 DO_CMP_PPZZ_S(sve_cmphs_ppzz_s, uint32_t, >=)
 DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=)
@@ -2496,74 +2486,75 @@ DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=)
 #undef DO_CMP_PPZZ
 
 /* Similar, but the second source is "wide".  */
-#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK)                     \
-uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{                                                                            \
-    intptr_t opr_sz = simd_oprsz(desc);                                      \
-    uint32_t flags = PREDTEST_INIT;                                          \
-    intptr_t i = opr_sz;                                                     \
-    do {                                                                     \
-        uint64_t out = 0, pg;                                                \
-        do {                                                                 \
-            TYPEW mm = *(TYPEW *)((char *)vm + i - 8);                               \
-            do {                                                             \
-                i -= sizeof(TYPE), out <<= sizeof(TYPE);                     \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));                              \
-                out |= nn OP mm;                                             \
-            } while (i & 7);                                                 \
-        } while (i & 63);                                                    \
-        pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK;                            \
-        out &= pg;                                                           \
-        *(uint64_t *)((char *)vd + (i >> 3)) = out;                                  \
-        flags = iter_predtest_bwd(out, pg, flags);                           \
-    } while (i > 0);                                                         \
-    return flags;                                                            \
-}
-
-#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \
-    DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1,   0xffffffffffffffffull)
-#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \
+#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK)                            \
+    uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg,              \
+                          uint32_t desc)                                       \
+    {                                                                          \
+        intptr_t opr_sz = simd_oprsz(desc);                                    \
+        uint32_t flags = PREDTEST_INIT;                                        \
+        intptr_t i = opr_sz;                                                   \
+        do {                                                                   \
+            uint64_t out = 0, pg;                                              \
+            do {                                                               \
+                TYPEW mm = *(TYPEW *)((char *)vm + i - 8);                     \
+                do {                                                           \
+                    i -= sizeof(TYPE), out <<= sizeof(TYPE);                   \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    out |= nn OP mm;                                           \
+                } while (i & 7);                                               \
+            } while (i & 63);                                                  \
+            pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK;                  \
+            out &= pg;                                                         \
+            *(uint64_t *)((char *)vd + (i >> 3)) = out;                        \
+            flags = iter_predtest_bwd(out, pg, flags);                         \
+        } while (i > 0);                                                       \
+        return flags;                                                          \
+    }
+
+#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP)                                   \
+    DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1, 0xffffffffffffffffull)
+#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP)                                   \
     DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_2, 0x5555555555555555ull)
-#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \
+#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP)                                   \
     DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_4, 0x1111111111111111ull)
 
-DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t,  uint64_t, ==)
+DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t, uint64_t, ==)
 DO_CMP_PPZW_H(sve_cmpeq_ppzw_h, int16_t, uint64_t, ==)
 DO_CMP_PPZW_S(sve_cmpeq_ppzw_s, int32_t, uint64_t, ==)
 
-DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t,  uint64_t, !=)
+DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t, uint64_t, !=)
 DO_CMP_PPZW_H(sve_cmpne_ppzw_h, int16_t, uint64_t, !=)
 DO_CMP_PPZW_S(sve_cmpne_ppzw_s, int32_t, uint64_t, !=)
 
-DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t,   int64_t, >)
-DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t,  int64_t, >)
-DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t,  int64_t, >)
+DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t, int64_t, >)
+DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t, int64_t, >)
+DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t, int64_t, >)
 
-DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t,   int64_t, >=)
-DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t,  int64_t, >=)
-DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t,  int64_t, >=)
+DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t, int64_t, >=)
+DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t, int64_t, >=)
+DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t, int64_t, >=)
 
-DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t,  uint64_t, >)
+DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t, uint64_t, >)
 DO_CMP_PPZW_H(sve_cmphi_ppzw_h, uint16_t, uint64_t, >)
 DO_CMP_PPZW_S(sve_cmphi_ppzw_s, uint32_t, uint64_t, >)
 
-DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t,  uint64_t, >=)
+DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t, uint64_t, >=)
 DO_CMP_PPZW_H(sve_cmphs_ppzw_h, uint16_t, uint64_t, >=)
 DO_CMP_PPZW_S(sve_cmphs_ppzw_s, uint32_t, uint64_t, >=)
 
-DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t,   int64_t, <)
-DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t,  int64_t, <)
-DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t,  int64_t, <)
+DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t, int64_t, <)
+DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t, int64_t, <)
+DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t, int64_t, <)
 
-DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t,   int64_t, <=)
-DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t,  int64_t, <=)
-DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t,  int64_t, <=)
+DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t, int64_t, <=)
+DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t, int64_t, <=)
+DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t, int64_t, <=)
 
-DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t,  uint64_t, <)
+DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t, uint64_t, <)
 DO_CMP_PPZW_H(sve_cmplo_ppzw_h, uint16_t, uint64_t, <)
 DO_CMP_PPZW_S(sve_cmplo_ppzw_s, uint32_t, uint64_t, <)
 
-DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t,  uint64_t, <=)
+DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t, uint64_t, <=)
 DO_CMP_PPZW_H(sve_cmpls_ppzw_h, uint16_t, uint64_t, <=)
 DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=)
 
@@ -2573,83 +2564,83 @@ DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=)
 #undef DO_CMP_PPZW
 
 /* Similar, but the second source is immediate.  */
-#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK)                         \
-uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)   \
-{                                                                    \
-    intptr_t opr_sz = simd_oprsz(desc);                              \
-    uint32_t flags = PREDTEST_INIT;                                  \
-    TYPE mm = simd_data(desc);                                       \
-    intptr_t i = opr_sz;                                             \
-    do {                                                             \
-        uint64_t out = 0, pg;                                        \
-        do {                                                         \
-            i -= sizeof(TYPE), out <<= sizeof(TYPE);                 \
-            TYPE nn = *(TYPE *)((char *)vn + H(i));                          \
-            out |= nn OP mm;                                         \
-        } while (i & 63);                                            \
-        pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK;                    \
-        out &= pg;                                                   \
-        *(uint64_t *)((char *)vd + (i >> 3)) = out;                          \
-        flags = iter_predtest_bwd(out, pg, flags);                   \
-    } while (i > 0);                                                 \
-    return flags;                                                    \
-}
-
-#define DO_CMP_PPZI_B(NAME, TYPE, OP) \
-    DO_CMP_PPZI(NAME, TYPE, OP, H1,   0xffffffffffffffffull)
-#define DO_CMP_PPZI_H(NAME, TYPE, OP) \
+#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK)                                   \
+    uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)         \
+    {                                                                          \
+        intptr_t opr_sz = simd_oprsz(desc);                                    \
+        uint32_t flags = PREDTEST_INIT;                                        \
+        TYPE mm = simd_data(desc);                                             \
+        intptr_t i = opr_sz;                                                   \
+        do {                                                                   \
+            uint64_t out = 0, pg;                                              \
+            do {                                                               \
+                i -= sizeof(TYPE), out <<= sizeof(TYPE);                       \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                        \
+                out |= nn OP mm;                                               \
+            } while (i & 63);                                                  \
+            pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK;                  \
+            out &= pg;                                                         \
+            *(uint64_t *)((char *)vd + (i >> 3)) = out;                        \
+            flags = iter_predtest_bwd(out, pg, flags);                         \
+        } while (i > 0);                                                       \
+        return flags;                                                          \
+    }
+
+#define DO_CMP_PPZI_B(NAME, TYPE, OP)                                          \
+    DO_CMP_PPZI(NAME, TYPE, OP, H1, 0xffffffffffffffffull)
+#define DO_CMP_PPZI_H(NAME, TYPE, OP)                                          \
     DO_CMP_PPZI(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
-#define DO_CMP_PPZI_S(NAME, TYPE, OP) \
+#define DO_CMP_PPZI_S(NAME, TYPE, OP)                                          \
     DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
-#define DO_CMP_PPZI_D(NAME, TYPE, OP) \
-    DO_CMP_PPZI(NAME, TYPE, OP,     , 0x0101010101010101ull)
+#define DO_CMP_PPZI_D(NAME, TYPE, OP)                                          \
+    DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull)
 
-DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t,  ==)
+DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==)
 DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==)
 DO_CMP_PPZI_S(sve_cmpeq_ppzi_s, uint32_t, ==)
 DO_CMP_PPZI_D(sve_cmpeq_ppzi_d, uint64_t, ==)
 
-DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t,  !=)
+DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t, !=)
 DO_CMP_PPZI_H(sve_cmpne_ppzi_h, uint16_t, !=)
 DO_CMP_PPZI_S(sve_cmpne_ppzi_s, uint32_t, !=)
 DO_CMP_PPZI_D(sve_cmpne_ppzi_d, uint64_t, !=)
 
-DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t,  >)
+DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t, >)
 DO_CMP_PPZI_H(sve_cmpgt_ppzi_h, int16_t, >)
 DO_CMP_PPZI_S(sve_cmpgt_ppzi_s, int32_t, >)
 DO_CMP_PPZI_D(sve_cmpgt_ppzi_d, int64_t, >)
 
-DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t,  >=)
+DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t, >=)
 DO_CMP_PPZI_H(sve_cmpge_ppzi_h, int16_t, >=)
 DO_CMP_PPZI_S(sve_cmpge_ppzi_s, int32_t, >=)
 DO_CMP_PPZI_D(sve_cmpge_ppzi_d, int64_t, >=)
 
-DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t,  >)
+DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t, >)
 DO_CMP_PPZI_H(sve_cmphi_ppzi_h, uint16_t, >)
 DO_CMP_PPZI_S(sve_cmphi_ppzi_s, uint32_t, >)
 DO_CMP_PPZI_D(sve_cmphi_ppzi_d, uint64_t, >)
 
-DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t,  >=)
+DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t, >=)
 DO_CMP_PPZI_H(sve_cmphs_ppzi_h, uint16_t, >=)
 DO_CMP_PPZI_S(sve_cmphs_ppzi_s, uint32_t, >=)
 DO_CMP_PPZI_D(sve_cmphs_ppzi_d, uint64_t, >=)
 
-DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t,  <)
+DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t, <)
 DO_CMP_PPZI_H(sve_cmplt_ppzi_h, int16_t, <)
 DO_CMP_PPZI_S(sve_cmplt_ppzi_s, int32_t, <)
 DO_CMP_PPZI_D(sve_cmplt_ppzi_d, int64_t, <)
 
-DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t,  <=)
+DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t, <=)
 DO_CMP_PPZI_H(sve_cmple_ppzi_h, int16_t, <=)
 DO_CMP_PPZI_S(sve_cmple_ppzi_s, int32_t, <=)
 DO_CMP_PPZI_D(sve_cmple_ppzi_d, int64_t, <=)
 
-DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t,  <)
+DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t, <)
 DO_CMP_PPZI_H(sve_cmplo_ppzi_h, uint16_t, <)
 DO_CMP_PPZI_S(sve_cmplo_ppzi_s, uint32_t, <)
 DO_CMP_PPZI_D(sve_cmplo_ppzi_d, uint64_t, <)
 
-DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t,  <=)
+DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t, <=)
 DO_CMP_PPZI_H(sve_cmpls_ppzi_h, uint16_t, <=)
 DO_CMP_PPZI_S(sve_cmpls_ppzi_s, uint32_t, <=)
 DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=)
@@ -2678,8 +2669,8 @@ static bool last_active_pred(void *vd, void *vg, intptr_t oprsz)
  * (if after) or excluding (if !after) the first G & N.
  * Return true if BRK found.
  */
-static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g,
-                        bool brk, bool after)
+static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, bool brk,
+                        bool after)
 {
     uint64_t b;
 
@@ -2690,16 +2681,16 @@ static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g,
         b = g;
     } else {
         /* Break somewhere in N.  Locate it.  */
-        b = g & n;            /* guard true, pred true */
+        b = g & n; /* guard true, pred true */
 #ifdef _MSC_VER
-        b = b & (0 - b);      /* first such */
+        b = b & (0 - b); /* first such */
 #else
-        b = b & -b;           /* first such */
+        b = b & -b; /* first such */
 #endif
         if (after) {
-            b = b | (b - 1);  /* break after same */
+            b = b | (b - 1); /* break after same */
         } else {
-            b = b - 1;        /* break before same */
+            b = b - 1; /* break before same */
         }
         brk = true;
     }
@@ -2709,8 +2700,8 @@ static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g,
 }
 
 /* Compute a zeroing BRK.  */
-static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g,
-                          intptr_t oprsz, bool after)
+static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g, intptr_t oprsz,
+                          bool after)
 {
     bool brk = false;
     intptr_t i;
@@ -2742,8 +2733,8 @@ static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g,
 }
 
 /* Compute a merging BRK.  */
-static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g,
-                          intptr_t oprsz, bool after)
+static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g, intptr_t oprsz,
+                          bool after)
 {
     bool brk = false;
     intptr_t i;
@@ -2960,61 +2951,61 @@ uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
  * The recursion is bounded to depth 7 (128 fp16 elements), so there's
  * little to gain with a more complex non-recursive form.
  */
-#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT)                         \
-static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
-{                                                                     \
-    if (n == 1) {                                                     \
-        return *data;                                                 \
-    } else {                                                          \
-        uintptr_t half = n / 2;                                       \
-        TYPE lo = NAME##_reduce(data, status, half);                  \
-        TYPE hi = NAME##_reduce(data + half, status, half);           \
-        return TYPE##_##FUNC(lo, hi, status);                         \
-    }                                                                 \
-}                                                                     \
-uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc)    \
-{                                                                     \
-    uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc);  \
-    TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)];                   \
-    for (i = 0; i < oprsz; ) {                                        \
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));               \
-        do {                                                          \
-            TYPE nn = *(TYPE *)((char *)vn + H(i));                           \
-            *(TYPE *)((char *)data + i) = (pg & 1 ? nn : IDENT);      \
-            i += sizeof(TYPE), pg >>= sizeof(TYPE);                   \
-        } while (i & 15);                                             \
-    }                                                                 \
-    for (; i < maxsz; i += sizeof(TYPE)) {                            \
-        *(TYPE *)((char *)data + i) = IDENT;                          \
-    }                                                                 \
-    return NAME##_reduce(data, vs, maxsz / sizeof(TYPE));             \
-}
+#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT)                                  \
+    static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n)   \
+    {                                                                          \
+        if (n == 1) {                                                          \
+            return *data;                                                      \
+        } else {                                                               \
+            uintptr_t half = n / 2;                                            \
+            TYPE lo = NAME##_reduce(data, status, half);                       \
+            TYPE hi = NAME##_reduce(data + half, status, half);                \
+            return TYPE##_##FUNC(lo, hi, status);                              \
+        }                                                                      \
+    }                                                                          \
+    uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc)         \
+    {                                                                          \
+        uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc);       \
+        TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)];                        \
+        for (i = 0; i < oprsz;) {                                              \
+            uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));            \
+            do {                                                               \
+                TYPE nn = *(TYPE *)((char *)vn + H(i));                        \
+                *(TYPE *)((char *)data + i) = (pg & 1 ? nn : IDENT);           \
+                i += sizeof(TYPE), pg >>= sizeof(TYPE);                        \
+            } while (i & 15);                                                  \
+        }                                                                      \
+        for (; i < maxsz; i += sizeof(TYPE)) {                                 \
+            *(TYPE *)((char *)data + i) = IDENT;                               \
+        }                                                                      \
+        return NAME##_reduce(data, vs, maxsz / sizeof(TYPE));                  \
+    }
 
 DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
 DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
-DO_REDUCE(sve_faddv_d, float64,     , add, float64_zero)
+DO_REDUCE(sve_faddv_d, float64, , add, float64_zero)
 
 /* Identity is floatN_default_nan, without the function call.  */
 DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
 DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
-DO_REDUCE(sve_fminnmv_d, float64,     , minnum, 0x7FF8000000000000ULL)
+DO_REDUCE(sve_fminnmv_d, float64, , minnum, 0x7FF8000000000000ULL)
 
 DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
 DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
-DO_REDUCE(sve_fmaxnmv_d, float64,     , maxnum, 0x7FF8000000000000ULL)
+DO_REDUCE(sve_fmaxnmv_d, float64, , maxnum, 0x7FF8000000000000ULL)
 
 DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
 DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
-DO_REDUCE(sve_fminv_d, float64,     , min, float64_infinity)
+DO_REDUCE(sve_fminv_d, float64, , min, float64_infinity)
 
 DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
 DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
-DO_REDUCE(sve_fmaxv_d, float64,     , max, float64_chs(float64_infinity))
+DO_REDUCE(sve_fmaxv_d, float64, , max, float64_chs(float64_infinity))
 
 #undef DO_REDUCE
 
-uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg,
-                             void *status, uint32_t desc)
+uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, void *status,
+                             uint32_t desc)
 {
     intptr_t i = 0, opr_sz = simd_oprsz(desc);
     float16 result = nn;
@@ -3033,8 +3024,8 @@ uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg,
     return result;
 }
 
-uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg,
-                             void *status, uint32_t desc)
+uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, void *status,
+                             uint32_t desc)
 {
     intptr_t i = 0, opr_sz = simd_oprsz(desc);
     float32 result = nn;
@@ -3053,8 +3044,8 @@ uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg,
     return result;
 }
 
-uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg,
-                             void *status, uint32_t desc)
+uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, void *status,
+                             uint32_t desc)
 {
     intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *m = vm;
@@ -3072,56 +3063,56 @@ uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg,
 /* Fully general three-operand expander, controlled by a predicate,
  * With the extra float_status parameter.
  */
-#define DO_ZPZZ_FP(NAME, TYPE, H, OP)                           \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg,       \
-                  void *status, uint32_t desc)                  \
-{                                                               \
-    intptr_t i = simd_oprsz(desc);                              \
-    uint64_t *g = vg;                                           \
-    do {                                                        \
-        uint64_t pg = g[(i - 1) >> 6];                          \
-        do {                                                    \
-            i -= sizeof(TYPE);                                  \
-            if (likely((pg >> (i & 63)) & 1)) {                 \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));                 \
-                TYPE mm = *(TYPE *)((char *)vm + H(i));                 \
-                *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status);      \
-            }                                                   \
-        } while (i & 63);                                       \
-    } while (i != 0);                                           \
-}
+#define DO_ZPZZ_FP(NAME, TYPE, H, OP)                                          \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, void *status,    \
+                      uint32_t desc)                                           \
+    {                                                                          \
+        intptr_t i = simd_oprsz(desc);                                         \
+        uint64_t *g = vg;                                                      \
+        do {                                                                   \
+            uint64_t pg = g[(i - 1) >> 6];                                     \
+            do {                                                               \
+                i -= sizeof(TYPE);                                             \
+                if (likely((pg >> (i & 63)) & 1)) {                            \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    TYPE mm = *(TYPE *)((char *)vm + H(i));                    \
+                    *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status);         \
+                }                                                              \
+            } while (i & 63);                                                  \
+        } while (i != 0);                                                      \
+    }
 
 DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add)
 DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add)
-DO_ZPZZ_FP(sve_fadd_d, uint64_t,     , float64_add)
+DO_ZPZZ_FP(sve_fadd_d, uint64_t, , float64_add)
 
 DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub)
 DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub)
-DO_ZPZZ_FP(sve_fsub_d, uint64_t,     , float64_sub)
+DO_ZPZZ_FP(sve_fsub_d, uint64_t, , float64_sub)
 
 DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul)
 DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul)
-DO_ZPZZ_FP(sve_fmul_d, uint64_t,     , float64_mul)
+DO_ZPZZ_FP(sve_fmul_d, uint64_t, , float64_mul)
 
 DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div)
 DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div)
-DO_ZPZZ_FP(sve_fdiv_d, uint64_t,     , float64_div)
+DO_ZPZZ_FP(sve_fdiv_d, uint64_t, , float64_div)
 
 DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min)
 DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min)
-DO_ZPZZ_FP(sve_fmin_d, uint64_t,     , float64_min)
+DO_ZPZZ_FP(sve_fmin_d, uint64_t, , float64_min)
 
 DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
 DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
-DO_ZPZZ_FP(sve_fmax_d, uint64_t,     , float64_max)
+DO_ZPZZ_FP(sve_fmax_d, uint64_t, , float64_max)
 
 DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
 DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
-DO_ZPZZ_FP(sve_fminnum_d, uint64_t,     , float64_minnum)
+DO_ZPZZ_FP(sve_fminnum_d, uint64_t, , float64_minnum)
 
 DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum)
 DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum)
-DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t,     , float64_maxnum)
+DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, , float64_maxnum)
 
 static inline float16 abd_h(float16 a, float16 b, float_status *s)
 {
@@ -3140,7 +3131,7 @@ static inline float64 abd_d(float64 a, float64 b, float_status *s)
 
 DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
 DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
-DO_ZPZZ_FP(sve_fabd_d, uint64_t,     , abd_d)
+DO_ZPZZ_FP(sve_fabd_d, uint64_t, , abd_d)
 
 static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
 {
@@ -3150,47 +3141,47 @@ static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
 
 DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn)
 DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn)
-DO_ZPZZ_FP(sve_fscalbn_d, int64_t,     , scalbn_d)
+DO_ZPZZ_FP(sve_fscalbn_d, int64_t, , scalbn_d)
 
 DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh)
 DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs)
-DO_ZPZZ_FP(sve_fmulx_d, uint64_t,     , helper_vfp_mulxd)
+DO_ZPZZ_FP(sve_fmulx_d, uint64_t, , helper_vfp_mulxd)
 
 #undef DO_ZPZZ_FP
 
 /* Three-operand expander, with one scalar operand, controlled by
  * a predicate, with the extra float_status parameter.
  */
-#define DO_ZPZS_FP(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar,  \
-                  void *status, uint32_t desc)                    \
-{                                                                 \
-    intptr_t i = simd_oprsz(desc);                                \
-    uint64_t *g = vg;                                             \
-    TYPE mm = scalar;                                             \
-    do {                                                          \
-        uint64_t pg = g[(i - 1) >> 6];                            \
-        do {                                                      \
-            i -= sizeof(TYPE);                                    \
-            if (likely((pg >> (i & 63)) & 1)) {                   \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));                   \
-                *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status);        \
-            }                                                     \
-        } while (i & 63);                                         \
-    } while (i != 0);                                             \
-}
+#define DO_ZPZS_FP(NAME, TYPE, H, OP)                                          \
+    void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar,           \
+                      void *status, uint32_t desc)                             \
+    {                                                                          \
+        intptr_t i = simd_oprsz(desc);                                         \
+        uint64_t *g = vg;                                                      \
+        TYPE mm = scalar;                                                      \
+        do {                                                                   \
+            uint64_t pg = g[(i - 1) >> 6];                                     \
+            do {                                                               \
+                i -= sizeof(TYPE);                                             \
+                if (likely((pg >> (i & 63)) & 1)) {                            \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status);         \
+                }                                                              \
+            } while (i & 63);                                                  \
+        } while (i != 0);                                                      \
+    }
 
 DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add)
 DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add)
-DO_ZPZS_FP(sve_fadds_d, float64,     , float64_add)
+DO_ZPZS_FP(sve_fadds_d, float64, , float64_add)
 
 DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub)
 DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub)
-DO_ZPZS_FP(sve_fsubs_d, float64,     , float64_sub)
+DO_ZPZS_FP(sve_fsubs_d, float64, , float64_sub)
 
 DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul)
 DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul)
-DO_ZPZS_FP(sve_fmuls_d, float64,     , float64_mul)
+DO_ZPZS_FP(sve_fmuls_d, float64, , float64_mul)
 
 static inline float16 subr_h(float16 a, float16 b, float_status *s)
 {
@@ -3209,43 +3200,44 @@ static inline float64 subr_d(float64 a, float64 b, float_status *s)
 
 DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h)
 DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s)
-DO_ZPZS_FP(sve_fsubrs_d, float64,     , subr_d)
+DO_ZPZS_FP(sve_fsubrs_d, float64, , subr_d)
 
 DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum)
 DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum)
-DO_ZPZS_FP(sve_fmaxnms_d, float64,     , float64_maxnum)
+DO_ZPZS_FP(sve_fmaxnms_d, float64, , float64_maxnum)
 
 DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum)
 DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum)
-DO_ZPZS_FP(sve_fminnms_d, float64,     , float64_minnum)
+DO_ZPZS_FP(sve_fminnms_d, float64, , float64_minnum)
 
 DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max)
 DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max)
-DO_ZPZS_FP(sve_fmaxs_d, float64,     , float64_max)
+DO_ZPZS_FP(sve_fmaxs_d, float64, , float64_max)
 
 DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
 DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
-DO_ZPZS_FP(sve_fmins_d, float64,     , float64_min)
+DO_ZPZS_FP(sve_fmins_d, float64, , float64_min)
 
 /* Fully general two-operand expander, controlled by a predicate,
  * With the extra float_status parameter.
  */
-#define DO_ZPZ_FP(NAME, TYPE, H, OP)                                  \
-void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
-{                                                                     \
-    intptr_t i = simd_oprsz(desc);                                    \
-    uint64_t *g = vg;                                                 \
-    do {                                                              \
-        uint64_t pg = g[(i - 1) >> 6];                                \
-        do {                                                          \
-            i -= sizeof(TYPE);                                        \
-            if (likely((pg >> (i & 63)) & 1)) {                       \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));                       \
-                *(TYPE *)((char *)vd + H(i)) = OP(nn, status);                \
-            }                                                         \
-        } while (i & 63);                                             \
-    } while (i != 0);                                                 \
-}
+#define DO_ZPZ_FP(NAME, TYPE, H, OP)                                           \
+    void HELPER(NAME)(void *vd, void *vn, void *vg, void *status,              \
+                      uint32_t desc)                                           \
+    {                                                                          \
+        intptr_t i = simd_oprsz(desc);                                         \
+        uint64_t *g = vg;                                                      \
+        do {                                                                   \
+            uint64_t pg = g[(i - 1) >> 6];                                     \
+            do {                                                               \
+                i -= sizeof(TYPE);                                             \
+                if (likely((pg >> (i & 63)) & 1)) {                            \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    *(TYPE *)((char *)vd + H(i)) = OP(nn, status);             \
+                }                                                              \
+            } while (i & 63);                                                  \
+        } while (i != 0);                                                      \
+    }
 
 /* SVE fp16 conversions always use IEEE mode.  Like AdvSIMD, they ignore
  * FZ16.  When converting from fp16, this affects flushing input denormals;
@@ -3253,7 +3245,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
  */
 static inline float32 sve_f16_to_f32(float16 f, float_status *fpst)
 {
-    flag save = get_flush_inputs_to_zero(fpst);
+    bool save = get_flush_inputs_to_zero(fpst);
     float32 ret;
 
     set_flush_inputs_to_zero(false, fpst);
@@ -3264,7 +3256,7 @@ static inline float32 sve_f16_to_f32(float16 f, float_status *fpst)
 
 static inline float64 sve_f16_to_f64(float16 f, float_status *fpst)
 {
-    flag save = get_flush_inputs_to_zero(fpst);
+    bool save = get_flush_inputs_to_zero(fpst);
     float64 ret;
 
     set_flush_inputs_to_zero(false, fpst);
@@ -3275,7 +3267,7 @@ static inline float64 sve_f16_to_f64(float16 f, float_status *fpst)
 
 static inline float16 sve_f32_to_f16(float32 f, float_status *fpst)
 {
-    flag save = get_flush_to_zero(fpst);
+    bool save = get_flush_to_zero(fpst);
     float16 ret;
 
     set_flush_to_zero(false, fpst);
@@ -3286,7 +3278,7 @@ static inline float16 sve_f32_to_f16(float32 f, float_status *fpst)
 
 static inline float16 sve_f64_to_f16(float64 f, float_status *fpst)
 {
-    flag save = get_flush_to_zero(fpst);
+    bool save = get_flush_to_zero(fpst);
     float16 ret;
 
     set_flush_to_zero(false, fpst);
@@ -3369,78 +3361,66 @@ static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s)
 
 DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16)
 DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32)
-DO_ZPZ_FP(sve_fcvt_dh, uint64_t,     , sve_f64_to_f16)
-DO_ZPZ_FP(sve_fcvt_hd, uint64_t,     , sve_f16_to_f64)
-DO_ZPZ_FP(sve_fcvt_ds, uint64_t,     , float64_to_float32)
-DO_ZPZ_FP(sve_fcvt_sd, uint64_t,     , float32_to_float64)
+DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16)
+DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64)
+DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32)
+DO_ZPZ_FP(sve_fcvt_sd, uint64_t, , float32_to_float64)
 
 DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz)
 DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh)
 DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs)
-DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t,     , vfp_float16_to_int64_rtz)
-DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t,     , vfp_float32_to_int64_rtz)
-DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t,     , helper_vfp_tosizd)
-DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t,     , vfp_float64_to_int64_rtz)
+DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, , vfp_float16_to_int64_rtz)
+DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, , vfp_float32_to_int64_rtz)
+DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, , helper_vfp_tosizd)
+DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, , vfp_float64_to_int64_rtz)
 
 DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz)
 DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh)
 DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs)
-DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t,     , vfp_float16_to_uint64_rtz)
-DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t,     , vfp_float32_to_uint64_rtz)
-DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t,     , helper_vfp_touizd)
-DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t,     , vfp_float64_to_uint64_rtz)
+DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, , vfp_float16_to_uint64_rtz)
+DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, , vfp_float32_to_uint64_rtz)
+DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, , helper_vfp_touizd)
+DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, , vfp_float64_to_uint64_rtz)
 
 DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth)
 DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints)
-DO_ZPZ_FP(sve_frint_d, uint64_t,     , helper_rintd)
+DO_ZPZ_FP(sve_frint_d, uint64_t, , helper_rintd)
 
 DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int)
 DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int)
-DO_ZPZ_FP(sve_frintx_d, uint64_t,     , float64_round_to_int)
+DO_ZPZ_FP(sve_frintx_d, uint64_t, , float64_round_to_int)
 
 DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16)
 DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32)
-DO_ZPZ_FP(sve_frecpx_d, uint64_t,     , helper_frecpx_f64)
+DO_ZPZ_FP(sve_frecpx_d, uint64_t, , helper_frecpx_f64)
 
 DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt)
 DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt)
-DO_ZPZ_FP(sve_fsqrt_d, uint64_t,     , float64_sqrt)
+DO_ZPZ_FP(sve_fsqrt_d, uint64_t, , float64_sqrt)
 
 DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16)
 DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16)
 DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32)
-DO_ZPZ_FP(sve_scvt_sd, uint64_t,     , int32_to_float64)
-DO_ZPZ_FP(sve_scvt_dh, uint64_t,     , int64_to_float16)
-DO_ZPZ_FP(sve_scvt_ds, uint64_t,     , int64_to_float32)
-DO_ZPZ_FP(sve_scvt_dd, uint64_t,     , int64_to_float64)
+DO_ZPZ_FP(sve_scvt_sd, uint64_t, , int32_to_float64)
+DO_ZPZ_FP(sve_scvt_dh, uint64_t, , int64_to_float16)
+DO_ZPZ_FP(sve_scvt_ds, uint64_t, , int64_to_float32)
+DO_ZPZ_FP(sve_scvt_dd, uint64_t, , int64_to_float64)
 
 DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16)
 DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16)
 DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32)
-DO_ZPZ_FP(sve_ucvt_sd, uint64_t,     , uint32_to_float64)
-DO_ZPZ_FP(sve_ucvt_dh, uint64_t,     , uint64_to_float16)
-DO_ZPZ_FP(sve_ucvt_ds, uint64_t,     , uint64_to_float32)
-DO_ZPZ_FP(sve_ucvt_dd, uint64_t,     , uint64_to_float64)
+DO_ZPZ_FP(sve_ucvt_sd, uint64_t, , uint32_to_float64)
+DO_ZPZ_FP(sve_ucvt_dh, uint64_t, , uint64_to_float16)
+DO_ZPZ_FP(sve_ucvt_ds, uint64_t, , uint64_to_float32)
+DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64)
 
 #undef DO_ZPZ_FP
 
-/* 4-operand predicated multiply-add.  This requires 7 operands to pass
- * "properly", so we need to encode some of the registers into DESC.
- */
-QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 20 > 32);
-
-static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc,
-                            uint16_t neg1, uint16_t neg3)
+static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
+                            float_status *status, uint32_t desc, uint16_t neg1,
+                            uint16_t neg3)
 {
     intptr_t i = simd_oprsz(desc);
-    unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
-    unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
-    unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
-    unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
-    void *vd = &env->vfp.zregs[rd];
-    void *vn = &env->vfp.zregs[rn];
-    void *vm = &env->vfp.zregs[rm];
-    void *va = &env->vfp.zregs[ra];
     uint64_t *g = vg;
 
     do {
@@ -3453,45 +3433,42 @@ static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc,
                 e1 = *(uint16_t *)((char *)vn + H1_2(i)) ^ neg1;
                 e2 = *(uint16_t *)((char *)vm + H1_2(i));
                 e3 = *(uint16_t *)((char *)va + H1_2(i)) ^ neg3;
-                r = float16_muladd(e1, e2, e3, 0, &env->vfp.fp_status_f16);
+                r = float16_muladd(e1, e2, e3, 0, status);
                 *(uint16_t *)((char *)vd + H1_2(i)) = r;
             }
         } while (i & 63);
     } while (i != 0);
 }
 
-void HELPER(sve_fmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg,
+                              void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_h(env, vg, desc, 0, 0);
+    do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
 }
 
-void HELPER(sve_fmls_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg,
+                              void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_h(env, vg, desc, 0x8000, 0);
+    do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
 }
 
-void HELPER(sve_fnmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg,
+                               void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_h(env, vg, desc, 0x8000, 0x8000);
+    do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
 }
 
-void HELPER(sve_fnmls_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg,
+                               void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_h(env, vg, desc, 0, 0x8000);
+    do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
 }
 
-static void do_fmla_zpzzz_s(CPUARMState *env, void *vg, uint32_t desc,
-                            uint32_t neg1, uint32_t neg3)
+static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
+                            float_status *status, uint32_t desc, uint32_t neg1,
+                            uint32_t neg3)
 {
     intptr_t i = simd_oprsz(desc);
-    unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
-    unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
-    unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
-    unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
-    void *vd = &env->vfp.zregs[rd];
-    void *vn = &env->vfp.zregs[rn];
-    void *vm = &env->vfp.zregs[rm];
-    void *va = &env->vfp.zregs[ra];
     uint64_t *g = vg;
 
     do {
@@ -3504,45 +3481,42 @@ static void do_fmla_zpzzz_s(CPUARMState *env, void *vg, uint32_t desc,
                 e1 = *(uint32_t *)((char *)vn + H1_4(i)) ^ neg1;
                 e2 = *(uint32_t *)((char *)vm + H1_4(i));
                 e3 = *(uint32_t *)((char *)va + H1_4(i)) ^ neg3;
-                r = float32_muladd(e1, e2, e3, 0, &env->vfp.fp_status);
+                r = float32_muladd(e1, e2, e3, 0, status);
                 *(uint32_t *)((char *)vd + H1_4(i)) = r;
             }
         } while (i & 63);
     } while (i != 0);
 }
 
-void HELPER(sve_fmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg,
+                              void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_s(env, vg, desc, 0, 0);
+    do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
 }
 
-void HELPER(sve_fmls_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg,
+                              void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_s(env, vg, desc, 0x80000000, 0);
+    do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
 }
 
-void HELPER(sve_fnmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg,
+                               void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_s(env, vg, desc, 0x80000000, 0x80000000);
+    do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
 }
 
-void HELPER(sve_fnmls_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg,
+                               void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_s(env, vg, desc, 0, 0x80000000);
+    do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
 }
 
-static void do_fmla_zpzzz_d(CPUARMState *env, void *vg, uint32_t desc,
-                            uint64_t neg1, uint64_t neg3)
+static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
+                            float_status *status, uint32_t desc, uint64_t neg1,
+                            uint64_t neg3)
 {
     intptr_t i = simd_oprsz(desc);
-    unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
-    unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
-    unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
-    unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
-    void *vd = &env->vfp.zregs[rd];
-    void *vn = &env->vfp.zregs[rn];
-    void *vm = &env->vfp.zregs[rm];
-    void *va = &env->vfp.zregs[ra];
     uint64_t *g = vg;
 
     do {
@@ -3555,31 +3529,35 @@ static void do_fmla_zpzzz_d(CPUARMState *env, void *vg, uint32_t desc,
                 e1 = *(uint64_t *)((char *)vn + i) ^ neg1;
                 e2 = *(uint64_t *)((char *)vm + i);
                 e3 = *(uint64_t *)((char *)va + i) ^ neg3;
-                r = float64_muladd(e1, e2, e3, 0, &env->vfp.fp_status);
+                r = float64_muladd(e1, e2, e3, 0, status);
                 *(uint64_t *)((char *)vd + i) = r;
             }
         } while (i & 63);
     } while (i != 0);
 }
 
-void HELPER(sve_fmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg,
+                              void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_d(env, vg, desc, 0, 0);
+    do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
 }
 
-void HELPER(sve_fmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg,
+                              void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_d(env, vg, desc, INT64_MIN, 0);
+    do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
 }
 
-void HELPER(sve_fnmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg,
+                               void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_d(env, vg, desc, INT64_MIN, INT64_MIN);
+    do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
 }
 
-void HELPER(sve_fnmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg,
+                               void *status, uint32_t desc)
 {
-    do_fmla_zpzzz_d(env, vg, desc, 0, INT64_MIN);
+    do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
 }
 
 /* Two operand floating-point comparison controlled by a predicate.
@@ -3587,49 +3565,46 @@ void HELPER(sve_fnmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
  * compare operands, since the comparison may have side effects wrt
  * the FPSR.
  */
-#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP)                                \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg,               \
-                  void *status, uint32_t desc)                          \
-{                                                                       \
-    intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6;                    \
-    uint64_t *d = vd, *g = vg;                                          \
-    do {                                                                \
-        uint64_t out = 0, pg = g[j];                                    \
-        do {                                                            \
-            i -= sizeof(TYPE), out <<= sizeof(TYPE);                    \
-            if (likely((pg >> (i & 63)) & 1)) {                         \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));                         \
-                TYPE mm = *(TYPE *)((char *)vm + H(i));                         \
-                out |= OP(TYPE, nn, mm, status);                        \
-            }                                                           \
-        } while (i & 63);                                               \
-        d[j--] = out;                                                   \
-    } while (i > 0);                                                    \
-}
-
-#define DO_FPCMP_PPZZ_H(NAME, OP) \
-    DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP)
-#define DO_FPCMP_PPZZ_S(NAME, OP) \
-    DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP)
-#define DO_FPCMP_PPZZ_D(NAME, OP) \
-    DO_FPCMP_PPZZ(NAME##_d, float64,     , OP)
-
-#define DO_FPCMP_PPZZ_ALL(NAME, OP) \
-    DO_FPCMP_PPZZ_H(NAME, OP)   \
-    DO_FPCMP_PPZZ_S(NAME, OP)   \
+#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP)                                       \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, void *status,    \
+                      uint32_t desc)                                           \
+    {                                                                          \
+        intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6;                       \
+        uint64_t *d = vd, *g = vg;                                             \
+        do {                                                                   \
+            uint64_t out = 0, pg = g[j];                                       \
+            do {                                                               \
+                i -= sizeof(TYPE), out <<= sizeof(TYPE);                       \
+                if (likely((pg >> (i & 63)) & 1)) {                            \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    TYPE mm = *(TYPE *)((char *)vm + H(i));                    \
+                    out |= OP(TYPE, nn, mm, status);                           \
+                }                                                              \
+            } while (i & 63);                                                  \
+            d[j--] = out;                                                      \
+        } while (i > 0);                                                       \
+    }
+
+#define DO_FPCMP_PPZZ_H(NAME, OP) DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP)
+#define DO_FPCMP_PPZZ_S(NAME, OP) DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP)
+#define DO_FPCMP_PPZZ_D(NAME, OP) DO_FPCMP_PPZZ(NAME##_d, float64, , OP)
+
+#define DO_FPCMP_PPZZ_ALL(NAME, OP)                                            \
+    DO_FPCMP_PPZZ_H(NAME, OP)                                                  \
+    DO_FPCMP_PPZZ_S(NAME, OP)                                                  \
     DO_FPCMP_PPZZ_D(NAME, OP)
 
-#define DO_FCMGE(TYPE, X, Y, ST)  TYPE##_compare(Y, X, ST) <= 0
-#define DO_FCMGT(TYPE, X, Y, ST)  TYPE##_compare(Y, X, ST) < 0
-#define DO_FCMLE(TYPE, X, Y, ST)  TYPE##_compare(X, Y, ST) <= 0
-#define DO_FCMLT(TYPE, X, Y, ST)  TYPE##_compare(X, Y, ST) < 0
-#define DO_FCMEQ(TYPE, X, Y, ST)  TYPE##_compare_quiet(X, Y, ST) == 0
-#define DO_FCMNE(TYPE, X, Y, ST)  TYPE##_compare_quiet(X, Y, ST) != 0
-#define DO_FCMUO(TYPE, X, Y, ST)  \
+#define DO_FCMGE(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) <= 0
+#define DO_FCMGT(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) < 0
+#define DO_FCMLE(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) <= 0
+#define DO_FCMLT(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) < 0
+#define DO_FCMEQ(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) == 0
+#define DO_FCMNE(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) != 0
+#define DO_FCMUO(TYPE, X, Y, ST)                                               \
     TYPE##_compare_quiet(X, Y, ST) == float_relation_unordered
-#define DO_FACGE(TYPE, X, Y, ST)  \
+#define DO_FACGE(TYPE, X, Y, ST)                                               \
     TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) <= 0
-#define DO_FACGT(TYPE, X, Y, ST)  \
+#define DO_FACGT(TYPE, X, Y, ST)                                               \
     TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) < 0
 
 DO_FPCMP_PPZZ_ALL(sve_fcmge, DO_FCMGE)
@@ -3649,35 +3624,32 @@ DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT)
 /* One operand floating-point comparison against zero, controlled
  * by a predicate.
  */
-#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP)                   \
-void HELPER(NAME)(void *vd, void *vn, void *vg,            \
-                  void *status, uint32_t desc)             \
-{                                                          \
-    intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6;       \
-    uint64_t *d = vd, *g = vg;                             \
-    do {                                                   \
-        uint64_t out = 0, pg = g[j];                       \
-        do {                                               \
-            i -= sizeof(TYPE), out <<= sizeof(TYPE);       \
-            if ((pg >> (i & 63)) & 1) {                    \
-                TYPE nn = *(TYPE *)((char *)vn + H(i));            \
-                out |= OP(TYPE, nn, 0, status);            \
-            }                                              \
-        } while (i & 63);                                  \
-        d[j--] = out;                                      \
-    } while (i > 0);                                       \
-}
-
-#define DO_FPCMP_PPZ0_H(NAME, OP) \
-    DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP)
-#define DO_FPCMP_PPZ0_S(NAME, OP) \
-    DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP)
-#define DO_FPCMP_PPZ0_D(NAME, OP) \
-    DO_FPCMP_PPZ0(NAME##_d, float64,     , OP)
-
-#define DO_FPCMP_PPZ0_ALL(NAME, OP) \
-    DO_FPCMP_PPZ0_H(NAME, OP)   \
-    DO_FPCMP_PPZ0_S(NAME, OP)   \
+#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP)                                       \
+    void HELPER(NAME)(void *vd, void *vn, void *vg, void *status,              \
+                      uint32_t desc)                                           \
+    {                                                                          \
+        intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6;                       \
+        uint64_t *d = vd, *g = vg;                                             \
+        do {                                                                   \
+            uint64_t out = 0, pg = g[j];                                       \
+            do {                                                               \
+                i -= sizeof(TYPE), out <<= sizeof(TYPE);                       \
+                if ((pg >> (i & 63)) & 1) {                                    \
+                    TYPE nn = *(TYPE *)((char *)vn + H(i));                    \
+                    out |= OP(TYPE, nn, 0, status);                            \
+                }                                                              \
+            } while (i & 63);                                                  \
+            d[j--] = out;                                                      \
+        } while (i > 0);                                                       \
+    }
+
+#define DO_FPCMP_PPZ0_H(NAME, OP) DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP)
+#define DO_FPCMP_PPZ0_S(NAME, OP) DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP)
+#define DO_FPCMP_PPZ0_D(NAME, OP) DO_FPCMP_PPZ0(NAME##_d, float64, , OP)
+
+#define DO_FPCMP_PPZ0_ALL(NAME, OP)                                            \
+    DO_FPCMP_PPZ0_H(NAME, OP)                                                  \
+    DO_FPCMP_PPZ0_S(NAME, OP)                                                  \
     DO_FPCMP_PPZ0_D(NAME, OP)
 
 DO_FPCMP_PPZ0_ALL(sve_fcmge0, DO_FCMGE)
@@ -3712,9 +3684,8 @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
 void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
 {
     static const float32 coeff[16] = {
-        0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9,
-        0x36369d6d, 0x00000000, 0x00000000, 0x00000000,
-        0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705,
+        0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9, 0x36369d6d, 0x00000000,
+        0x00000000, 0x00000000, 0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705,
         0x37cd37cc, 0x00000000, 0x00000000, 0x00000000,
     };
     intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32);
@@ -3734,14 +3705,12 @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
 void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
 {
     static const float64 coeff[16] = {
-        0x3ff0000000000000ull, 0xbfc5555555555543ull,
-        0x3f8111111110f30cull, 0xbf2a01a019b92fc6ull,
-        0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull,
-        0x3de5d8408868552full, 0x0000000000000000ull,
-        0x3ff0000000000000ull, 0xbfe0000000000000ull,
-        0x3fa5555555555536ull, 0xbf56c16c16c13a0bull,
-        0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull,
-        0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull,
+        0x3ff0000000000000ull, 0xbfc5555555555543ull, 0x3f8111111110f30cull,
+        0xbf2a01a019b92fc6ull, 0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull,
+        0x3de5d8408868552full, 0x0000000000000000ull, 0x3ff0000000000000ull,
+        0xbfe0000000000000ull, 0x3fa5555555555536ull, 0xbf56c16c16c13a0bull,
+        0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull, 0x3e21ee96d2641b13ull,
+        0xbda8f76380fbb401ull,
     };
     intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64);
     intptr_t x = simd_data(desc);
@@ -3761,8 +3730,8 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
  * FP Complex Add
  */
 
-void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
-                         void *vs, uint32_t desc)
+void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, void *vs,
+                         uint32_t desc)
 {
     intptr_t j, i = simd_oprsz(desc);
     uint64_t *g = vg;
@@ -3793,8 +3762,8 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
     } while (i != 0);
 }
 
-void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
-                         void *vs, uint32_t desc)
+void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, void *vs,
+                         uint32_t desc)
 {
     intptr_t j, i = simd_oprsz(desc);
     uint64_t *g = vg;
@@ -3825,8 +3794,8 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
     } while (i != 0);
 }
 
-void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
-                         void *vs, uint32_t desc)
+void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, void *vs,
+                         uint32_t desc)
 {
     intptr_t j, i = simd_oprsz(desc);
     uint64_t *g = vg;
@@ -3861,22 +3830,13 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
  * FP Complex Multiply
  */
 
-QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 22 > 32);
-
-void HELPER(sve_fcmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg,
+                               void *status, uint32_t desc)
 {
     intptr_t j, i = simd_oprsz(desc);
-    unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
-    unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
-    unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
-    unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
-    unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2);
+    unsigned rot = simd_data(desc);
     bool flip = rot & 1;
     float16 neg_imag, neg_real;
-    void *vd = &env->vfp.zregs[rd];
-    void *vn = &env->vfp.zregs[rn];
-    void *vm = &env->vfp.zregs[rm];
-    void *va = &env->vfp.zregs[ra];
     uint64_t *g = vg;
 
     neg_imag = float16_set_sign(0, (rot & 2) != 0);
@@ -3903,32 +3863,25 @@ void HELPER(sve_fcmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
 
             if (likely((pg >> (i & 63)) & 1)) {
                 d = *(float16 *)((char *)va + H1_2(i));
-                d = float16_muladd(e2, e1, d, 0, &env->vfp.fp_status_f16);
+                d = float16_muladd(e2, e1, d, 0, status);
                 *(float16 *)((char *)vd + H1_2(i)) = d;
             }
             if (likely((pg >> (j & 63)) & 1)) {
                 d = *(float16 *)((char *)va + H1_2(j));
-                d = float16_muladd(e4, e3, d, 0, &env->vfp.fp_status_f16);
+                d = float16_muladd(e4, e3, d, 0, status);
                 *(float16 *)((char *)vd + H1_2(j)) = d;
             }
         } while (i & 63);
     } while (i != 0);
 }
 
-void HELPER(sve_fcmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg,
+                               void *status, uint32_t desc)
 {
     intptr_t j, i = simd_oprsz(desc);
-    unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
-    unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
-    unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
-    unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
-    unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2);
+    unsigned rot = simd_data(desc);
     bool flip = rot & 1;
     float32 neg_imag, neg_real;
-    void *vd = &env->vfp.zregs[rd];
-    void *vn = &env->vfp.zregs[rn];
-    void *vm = &env->vfp.zregs[rm];
-    void *va = &env->vfp.zregs[ra];
     uint64_t *g = vg;
 
     neg_imag = float32_set_sign(0, (rot & 2) != 0);
@@ -3955,32 +3908,25 @@ void HELPER(sve_fcmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
 
             if (likely((pg >> (i & 63)) & 1)) {
                 d = *(float32 *)((char *)va + H1_2(i));
-                d = float32_muladd(e2, e1, d, 0, &env->vfp.fp_status);
+                d = float32_muladd(e2, e1, d, 0, status);
                 *(float32 *)((char *)vd + H1_2(i)) = d;
             }
             if (likely((pg >> (j & 63)) & 1)) {
                 d = *(float32 *)((char *)va + H1_2(j));
-                d = float32_muladd(e4, e3, d, 0, &env->vfp.fp_status);
+                d = float32_muladd(e4, e3, d, 0, status);
                 *(float32 *)((char *)vd + H1_2(j)) = d;
             }
         } while (i & 63);
     } while (i != 0);
 }
 
-void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
+void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg,
+                               void *status, uint32_t desc)
 {
     intptr_t j, i = simd_oprsz(desc);
-    unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
-    unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
-    unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
-    unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
-    unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2);
+    unsigned rot = simd_data(desc);
     bool flip = rot & 1;
     float64 neg_imag, neg_real;
-    void *vd = &env->vfp.zregs[rd];
-    void *vn = &env->vfp.zregs[rn];
-    void *vm = &env->vfp.zregs[rm];
-    void *va = &env->vfp.zregs[ra];
     uint64_t *g = vg;
 
     neg_imag = float64_set_sign(0, (rot & 2) != 0);
@@ -4007,12 +3953,12 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
 
             if (likely((pg >> (i & 63)) & 1)) {
                 d = *(float64 *)((char *)va + H1_2(i));
-                d = float64_muladd(e2, e1, d, 0, &env->vfp.fp_status);
+                d = float64_muladd(e2, e1, d, 0, status);
                 *(float64 *)((char *)vd + H1_2(i)) = d;
             }
             if (likely((pg >> (j & 63)) & 1)) {
                 d = *(float64 *)((char *)va + H1_2(j));
-                d = float64_muladd(e4, e3, d, 0, &env->vfp.fp_status);
+                d = float64_muladd(e4, e3, d, 0, status);
                 *(float64 *)((char *)vd + H1_2(j)) = d;
             }
         } while (i & 63);
@@ -4024,103 +3970,111 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
  */
 
 /*
- * Load elements into @vd, controlled by @vg, from @host + @mem_ofs.
- * Memory is valid through @host + @mem_max.  The register element
- * indicies are inferred from @mem_ofs, as modified by the types for
- * which the helper is built.  Return the @mem_ofs of the first element
- * not loaded (which is @mem_max if they are all loaded).
- *
- * For softmmu, we have fully validated the guest page.  For user-only,
- * we cannot fully validate without taking the mmap lock, but since we
- * know the access is within one host page, if any access is valid they
- * all must be valid.  However, when @vg is all false, it may be that
- * no access is valid.
+ * Load one element into @vd + @reg_off from @host.
+ * The controlling predicate is known to be true.
  */
-typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host,
-                                 intptr_t mem_ofs, intptr_t mem_max);
+typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host);
 
 /*
  * Load one element into @vd + @reg_off from (@env, @vaddr, @ra).
  * The controlling predicate is known to be true.
  */
-typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
-                            target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra);
-typedef sve_ld1_tlb_fn sve_st1_tlb_fn;
+typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
+                              target_ulong vaddr, uintptr_t retaddr);
 
 /*
  * Generate the above primitives.
  */
 
-#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \
-static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host,           \
-                                  intptr_t mem_off, const intptr_t mem_max) \
-{                                                                           \
-    intptr_t reg_off = mem_off * (sizeof(TYPEE) / sizeof(TYPEM));           \
-    uint64_t *pg = vg;                                                      \
-    while (mem_off + sizeof(TYPEM) <= mem_max) {                            \
-        TYPEM val = 0;                                                      \
-        if (likely((pg[reg_off >> 6] >> (reg_off & 63)) & 1)) {             \
-            val = HOST((char *)host + mem_off);                                     \
-        }                                                                   \
-        *(TYPEE *)((char *)vd + H(reg_off)) = val;                                  \
-        mem_off += sizeof(TYPEM), reg_off += sizeof(TYPEE);                 \
-    }                                                                       \
-    return mem_off;                                                         \
-}
-
-#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \
-static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
-                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra)  \
-{                                                                           \
-    TYPEM val = TLB(env, addr, oi, ra);                                     \
-    *(TYPEE *)((char *)vd + H(reg_off)) = val;                                      \
-}
-
-#define DO_LD_PRIM_1(NAME, H, TE, TM)                   \
-    DO_LD_HOST(NAME, H, TE, TM, ldub_p)                 \
-    DO_LD_TLB(NAME, H, TE, TM, ldub_p, 0, helper_ret_ldub_mmu)
-
-DO_LD_PRIM_1(ld1bb,  H1,   uint8_t,  uint8_t)
-DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
-DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t,  int8_t)
-DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
-DO_LD_PRIM_1(ld1bss, H1_4, uint32_t,  int8_t)
-DO_LD_PRIM_1(ld1bdu,     , uint64_t, uint8_t)
-DO_LD_PRIM_1(ld1bds,     , uint64_t,  int8_t)
-
-#define DO_LD_PRIM_2(NAME, end, MOEND, H, TE, TM, PH, PT)  \
-    DO_LD_HOST(NAME##_##end, H, TE, TM, PH##_##end##_p)    \
-    DO_LD_TLB(NAME##_##end, H, TE, TM, PH##_##end##_p,     \
-              MOEND, helper_##end##_##PT##_mmu)
-
-DO_LD_PRIM_2(ld1hh,  le, MO_LE, H1_2, uint16_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hsu, le, MO_LE, H1_4, uint32_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hss, le, MO_LE, H1_4, uint32_t,  int16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hdu, le, MO_LE,     , uint64_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hds, le, MO_LE,     , uint64_t,  int16_t, lduw, lduw)
+#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST)                                \
+    static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host)      \
+    {                                                                          \
+        TYPEM val = HOST(host);                                                \
+        *(TYPEE *)((char*)vd + H(reg_off)) = val;                              \
+    }
 
-DO_LD_PRIM_2(ld1ss,  le, MO_LE, H1_4, uint32_t, uint32_t, ldl, ldul)
-DO_LD_PRIM_2(ld1sdu, le, MO_LE,     , uint64_t, uint32_t, ldl, ldul)
-DO_LD_PRIM_2(ld1sds, le, MO_LE,     , uint64_t,  int32_t, ldl, ldul)
+#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST)                                \
+    static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host)      \
+    {                                                                          \
+        HOST(host, (TYPEM) * (TYPEE *)((char*)vd + H(reg_off)));               \
+    }
 
-DO_LD_PRIM_2(ld1dd,  le, MO_LE,     , uint64_t, uint64_t, ldq, ldq)
+#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB)                                  \
+    static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
+                                 target_ulong addr, uintptr_t ra)              \
+    {                                                                          \
+        *(TYPEE *)((char*)vd + H(reg_off)) =                                   \
+            (TYPEM)TLB(env, useronly_clean_ptr(addr), ra);                     \
+    }
 
-DO_LD_PRIM_2(ld1hh,  be, MO_BE, H1_2, uint16_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hsu, be, MO_BE, H1_4, uint32_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hss, be, MO_BE, H1_4, uint32_t,  int16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hdu, be, MO_BE,     , uint64_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hds, be, MO_BE,     , uint64_t,  int16_t, lduw, lduw)
+#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB)                                  \
+    static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
+                                 target_ulong addr, uintptr_t ra)              \
+    {                                                                          \
+        TLB(env, useronly_clean_ptr(addr),                                     \
+            (TYPEM) * (TYPEE *)((char*)vd + H(reg_off)), ra);                  \
+    }
 
-DO_LD_PRIM_2(ld1ss,  be, MO_BE, H1_4, uint32_t, uint32_t, ldl, ldul)
-DO_LD_PRIM_2(ld1sdu, be, MO_BE,     , uint64_t, uint32_t, ldl, ldul)
-DO_LD_PRIM_2(ld1sds, be, MO_BE,     , uint64_t,  int32_t, ldl, ldul)
+#define DO_LD_PRIM_1(NAME, H, TE, TM)                                          \
+    DO_LD_HOST(NAME, H, TE, TM, ldub_p)                                        \
+    DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra)
 
-DO_LD_PRIM_2(ld1dd,  be, MO_BE,     , uint64_t, uint64_t, ldq, ldq)
+DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t)
+DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
+DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t)
+DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
+DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t)
+DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t)
+DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t)
+
+#define DO_ST_PRIM_1(NAME, H, TE, TM)                                          \
+    DO_ST_HOST(st1##NAME, H, TE, TM, stb_p)                                    \
+    DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra)
+
+DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t)
+DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t)
+DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t)
+DO_ST_PRIM_1(bd, , uint64_t, uint8_t)
+
+#define DO_LD_PRIM_2(NAME, H, TE, TM, LD)                                      \
+    DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p)                           \
+    DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p)                           \
+    DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra)                \
+    DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra)
+
+#define DO_ST_PRIM_2(NAME, H, TE, TM, ST)                                      \
+    DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p)                           \
+    DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p)                           \
+    DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra)                \
+    DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra)
+
+DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw)
+DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw)
+DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw)
+DO_LD_PRIM_2(hdu, , uint64_t, uint16_t, lduw)
+DO_LD_PRIM_2(hds, , uint64_t, int16_t, lduw)
+
+DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw)
+DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw)
+DO_ST_PRIM_2(hd, , uint64_t, uint16_t, stw)
+
+DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl)
+DO_LD_PRIM_2(sdu, , uint64_t, uint32_t, ldl)
+DO_LD_PRIM_2(sds, , uint64_t, int32_t, ldl)
+
+DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl)
+DO_ST_PRIM_2(sd, , uint64_t, uint32_t, stl)
+
+DO_LD_PRIM_2(dd, , uint64_t, uint64_t, ldq)
+DO_ST_PRIM_2(dd, , uint64_t, uint64_t, stq)
 
 #undef DO_LD_TLB
+#undef DO_ST_TLB
 #undef DO_LD_HOST
 #undef DO_LD_PRIM_1
+#undef DO_ST_PRIM_1
 #undef DO_LD_PRIM_2
+#undef DO_ST_PRIM_2
 
 /*
  * Skip through a sequence of inactive elements in the guarding predicate @vg,
@@ -4157,297 +4111,687 @@ static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off,
 }
 
 /*
- * Return the maximum offset <= @mem_max which is still within the page
- * referenced by @base + @mem_off.
+ * Resolve the guest virtual address to info->host and info->flags.
+ * If @nofault, return false if the page is invalid, otherwise
+ * exit via page fault exception.
  */
-static intptr_t max_for_page(struct uc_struct *uc, target_ulong base, intptr_t mem_off,
-                             intptr_t mem_max)
-{
-    target_ulong addr = base + mem_off;
-    intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK);
-    return MIN(split, mem_max - mem_off) + mem_off;
-}
 
-/* These are normally defined only for CONFIG_USER_ONLY in <exec/cpu_ldst.h> */
-static inline void set_helper_retaddr(uintptr_t ra) { }
-static inline void clear_helper_retaddr(void) { }
+typedef struct {
+    void *host;
+    int flags;
+    MemTxAttrs attrs;
+} SVEHostPage;
 
-/*
- * The result of tlb_vaddr_to_host for user-only is just g2h(x),
- * which is always non-null.  Elide the useless test.
- */
-static inline bool test_host_page(void *host)
+static bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
+                           target_ulong addr, int mem_off,
+                           MMUAccessType access_type, int mmu_idx,
+                           uintptr_t retaddr)
 {
-    return likely(host != NULL);
+    int flags;
+
+    addr += mem_off;
+
+    /*
+     * User-only currently always issues with TBI.  See the comment
+     * above useronly_clean_ptr.  Usually we clean this top byte away
+     * during translation, but we can't do that for e.g. vector + imm
+     * addressing modes.
+     *
+     * We currently always enable TBI for user-only, and do not provide
+     * a way to turn it off.  So clean the pointer unconditionally here,
+     * rather than look it up here, or pass it down from above.
+     */
+    addr = useronly_clean_ptr(addr);
+
+    flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault,
+                               &info->host, retaddr);
+    info->flags = flags;
+
+    if (flags & TLB_INVALID_MASK) {
+        g_assert(nofault);
+        return false;
+    }
+
+    /* Ensure that info->host[] is relative to addr, not addr + mem_off. */
+    info->host = (void*)((char*)(info->host) - mem_off);
+
+    /*
+     * Find the iotlbentry for addr and return the transaction attributes.
+     * This *must* be present in the TLB because we just found the mapping.
+     */
+    {
+        uintptr_t index = tlb_index(env, mmu_idx, addr);
+
+#ifdef CONFIG_DEBUG_TCG
+        CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+        target_ulong comparator =
+            (access_type == MMU_DATA_LOAD ? entry->addr_read
+                                          : tlb_addr_write(entry));
+        g_assert(tlb_hit(comparator, addr));
+#endif
+
+        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+        info->attrs = iotlbentry->attrs;
+    }
+
+    return true;
 }
 
 /*
- * Common helper for all contiguous one-register predicated loads.
+ * Analyse contiguous data, protected by a governing predicate.
  */
-static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
-                      uint32_t desc, const uintptr_t retaddr,
-                      const int esz, const int msz,
-                      sve_ld1_host_fn *host_fn,
-                      sve_ld1_tlb_fn *tlb_fn)
-{
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const int mmu_idx = get_mmuidx(oi);
-    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
-    void *vd = &env->vfp.zregs[rd];
-    const int diffsz = esz - msz;
-    const intptr_t reg_max = simd_oprsz(desc);
-    const intptr_t mem_max = reg_max >> diffsz;
-    ARMVectorReg scratch;
-    void *host;
-    intptr_t split, reg_off, mem_off;
 
-    /* Find the first active element.  */
-    reg_off = find_next_active(vg, 0, reg_max, esz);
-    if (unlikely(reg_off == reg_max)) {
-        /* The entire predicate was false; no load occurs.  */
-        memset(vd, 0, reg_max);
-        return;
-    }
-    mem_off = reg_off >> diffsz;
-    set_helper_retaddr(retaddr);
+typedef enum {
+    FAULT_NO,
+    FAULT_FIRST,
+    FAULT_ALL,
+} SVEContFault;
 
+typedef struct {
     /*
-     * If the (remaining) load is entirely within a single page, then:
-     * For softmmu, and the tlb hits, then no faults will occur;
-     * For user-only, either the first load will fault or none will.
-     * We can thus perform the load directly to the destination and
-     * Vd will be unmodified on any exception path.
+     * First and last element wholly contained within the two pages.
+     * mem_off_first[0] and reg_off_first[0] are always set >= 0.
+     * reg_off_last[0] may be < 0 if the first element crosses pages.
+     * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1]
+     * are set >= 0 only if there are complete elements on a second page.
+     *
+     * The reg_off_* offsets are relative to the internal vector register.
+     * The mem_off_first offset is relative to the memory address; the
+     * two offsets are different when a load operation extends, a store
+     * operation truncates, or for multi-register operations.
      */
-    split = max_for_page(env->uc, addr, mem_off, mem_max);
-    if (likely(split == mem_max)) {
-        host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
-        if (test_host_page(host)) {
-            mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max);
-            tcg_debug_assert(mem_off == mem_max);
-            clear_helper_retaddr();
-            /* After having taken any fault, zero leading inactive elements. */
-            swap_memzero(vd, reg_off);
-            return;
+    int16_t mem_off_first[2];
+    int16_t reg_off_first[2];
+    int16_t reg_off_last[2];
+
+    /*
+     * One element that is misaligned and spans both pages,
+     * or -1 if there is no such active element.
+     */
+    int16_t mem_off_split;
+    int16_t reg_off_split;
+
+    /*
+     * The byte offset at which the entire operation crosses a page boundary.
+     * Set >= 0 if and only if the entire operation spans two pages.
+     */
+    int16_t page_split;
+
+    /* TLB data for the two pages. */
+    SVEHostPage page[2];
+} SVEContLdSt;
+
+/*
+ * Find first active element on each page, and a loose bound for the
+ * final element on each page.  Identify any single element that spans
+ * the page boundary.  Return true if there are any active elements.
+ */
+static bool sve_cont_ldst_elements(CPUARMState *env, SVEContLdSt *info,
+                                   target_ulong addr, uint64_t *vg,
+                                   intptr_t reg_max, int esz, int msize)
+{
+    uc_engine *uc = env->uc;
+    const int esize = 1 << esz;
+    const uint64_t pg_mask = pred_esz_masks[esz];
+    intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split;
+    intptr_t mem_off_last, mem_off_split;
+    intptr_t page_split, elt_split;
+    intptr_t i;
+
+    /* Set all of the element indices to -1, and the TLB data to 0. */
+    memset(info, -1, offsetof(SVEContLdSt, page));
+    memset(info->page, 0, sizeof(info->page));
+
+    /* Gross scan over the entire predicate to find bounds. */
+    i = 0;
+    do {
+        uint64_t pg = vg[i] & pg_mask;
+        if (pg) {
+            reg_off_last = i * 64 + 63 - clz64(pg);
+            if (reg_off_first < 0) {
+                reg_off_first = i * 64 + ctz64(pg);
+            }
         }
+    } while (++i * 64 < reg_max);
+
+    if (unlikely(reg_off_first < 0)) {
+        /* No active elements, no pages touched. */
+        return false;
     }
+    tcg_debug_assert(reg_off_last >= 0 && reg_off_last < reg_max);
+
+    info->reg_off_first[0] = reg_off_first;
+    info->mem_off_first[0] = (reg_off_first >> esz) * msize;
+    mem_off_last = (reg_off_last >> esz) * msize;
+
+    page_split = -(addr | TARGET_PAGE_MASK);
+    if (likely(mem_off_last + msize <= page_split)) {
+        /* The entire operation fits within a single page. */
+        info->reg_off_last[0] = reg_off_last;
+        return true;
+    }
+
+    info->page_split = page_split;
+    elt_split = page_split / msize;
+    reg_off_split = elt_split << esz;
+    mem_off_split = elt_split * msize;
 
     /*
-     * Perform the predicated read into a temporary, thus ensuring
-     * if the load of the last element faults, Vd is not modified.
+     * This is the last full element on the first page, but it is not
+     * necessarily active.  If there is no full element, i.e. the first
+     * active element is the one that's split, this value remains -1.
+     * It is useful as iteration bounds.
      */
-    memset(&scratch, 0, reg_max);
-    goto start;
-    while (1) {
-        reg_off = find_next_active(vg, reg_off, reg_max, esz);
-        if (reg_off >= reg_max) {
-            break;
-        }
-        mem_off = reg_off >> diffsz;
-        split = max_for_page(env->uc, addr, mem_off, mem_max);
-
-    start:
-        if (split - mem_off >= (1ULL << msz)) {
-            /* At least one whole element on this page.  */
-            host = tlb_vaddr_to_host(env, addr + mem_off,
-                                     MMU_DATA_LOAD, mmu_idx);
-            if (host) {
-                mem_off = host_fn(&scratch, vg, (char *)host - mem_off,
-                                  mem_off, split);
-                reg_off = mem_off << diffsz;
-                continue;
+    if (elt_split != 0) {
+        info->reg_off_last[0] = reg_off_split - esize;
+    }
+
+    /* Determine if an unaligned element spans the pages.  */
+    if (page_split % msize != 0) {
+        /* It is helpful to know if the split element is active. */
+        if ((vg[reg_off_split >> 6] >> (reg_off_split & 63)) & 1) {
+            info->reg_off_split = reg_off_split;
+            info->mem_off_split = mem_off_split;
+
+            if (reg_off_split == reg_off_last) {
+                /* The page crossing element is last. */
+                return true;
             }
         }
+        reg_off_split += esize;
+        mem_off_split += msize;
+    }
+
+    /*
+     * We do want the first active element on the second page, because
+     * this may affect the address reported in an exception.
+     */
+    reg_off_split = find_next_active(vg, reg_off_split, reg_max, esz);
+    tcg_debug_assert(reg_off_split <= reg_off_last);
+    info->reg_off_first[1] = reg_off_split;
+    info->mem_off_first[1] = (reg_off_split >> esz) * msize;
+    info->reg_off_last[1] = reg_off_last;
+    return true;
+}
+
+/*
+ * Resolve the guest virtual addresses to info->page[].
+ * Control the generation of page faults with @fault.  Return false if
+ * there is no work to do, which can only happen with @fault == FAULT_NO.
+ */
+static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault,
+                                CPUARMState *env, target_ulong addr,
+                                MMUAccessType access_type, uintptr_t retaddr)
+{
+    int mmu_idx = cpu_mmu_index(env, false);
+    int mem_off = info->mem_off_first[0];
+    bool nofault = fault == FAULT_NO;
+    bool have_work = true;
+
+    if (!sve_probe_page(&info->page[0], nofault, env, addr, mem_off,
+                        access_type, mmu_idx, retaddr)) {
+        /* No work to be done. */
+        return false;
+    }
+
+    if (likely(info->page_split < 0)) {
+        /* The entire operation was on the one page. */
+        return true;
+    }
 
+    /*
+     * If the second page is invalid, then we want the fault address to be
+     * the first byte on that page which is accessed.
+     */
+    if (info->mem_off_split >= 0) {
+        /*
+         * There is an element split across the pages.  The fault address
+         * should be the first byte of the second page.
+         */
+        mem_off = info->page_split;
         /*
-         * Perform one normal read.  This may fault, longjmping out to the
-         * main loop in order to raise an exception.  It may succeed, and
-         * as a side-effect load the TLB entry for the next round.  Finally,
-         * in the extremely unlikely case we're performing this operation
-         * on I/O memory, it may succeed but not bring in the TLB entry.
-         * But even then we have still made forward progress.
+         * If the split element is also the first active element
+         * of the vector, then:  For first-fault we should continue
+         * to generate faults for the second page.  For no-fault,
+         * we have work only if the second page is valid.
          */
-        tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr);
-        reg_off += 1ULL << esz;
+        if (info->mem_off_first[0] < info->mem_off_split) {
+            nofault = FAULT_FIRST;
+            have_work = false;
+        }
+    } else {
+        /*
+         * There is no element split across the pages.  The fault address
+         * should be the first active element on the second page.
+         */
+        mem_off = info->mem_off_first[1];
+        /*
+         * There must have been one active element on the first page,
+         * so we're out of first-fault territory.
+         */
+        nofault = fault != FAULT_ALL;
     }
 
-    clear_helper_retaddr();
-    memcpy(vd, &scratch, reg_max);
+    have_work |= sve_probe_page(&info->page[1], nofault, env, addr, mem_off,
+                                access_type, mmu_idx, retaddr);
+    return have_work;
 }
 
-#define DO_LD1_1(NAME, ESZ) \
-void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg,        \
-                            target_ulong addr, uint32_t desc)  \
-{                                                              \
-    sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0,            \
-              sve_##NAME##_host, sve_##NAME##_tlb);            \
-}
-
-#define DO_LD1_2(NAME, ESZ, MSZ) \
-void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg,        \
-                               target_ulong addr, uint32_t desc)  \
-{                                                                 \
-    sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,             \
-              sve_##NAME##_le_host, sve_##NAME##_le_tlb);         \
-}                                                                 \
-void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg,        \
-                               target_ulong addr, uint32_t desc)  \
-{                                                                 \
-    sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,             \
-              sve_##NAME##_be_host, sve_##NAME##_be_tlb);         \
-}
-
-DO_LD1_1(ld1bb,  0)
-DO_LD1_1(ld1bhu, 1)
-DO_LD1_1(ld1bhs, 1)
-DO_LD1_1(ld1bsu, 2)
-DO_LD1_1(ld1bss, 2)
-DO_LD1_1(ld1bdu, 3)
-DO_LD1_1(ld1bds, 3)
-
-DO_LD1_2(ld1hh,  1, 1)
-DO_LD1_2(ld1hsu, 2, 1)
-DO_LD1_2(ld1hss, 2, 1)
-DO_LD1_2(ld1hdu, 3, 1)
-DO_LD1_2(ld1hds, 3, 1)
-
-DO_LD1_2(ld1ss,  2, 2)
-DO_LD1_2(ld1sdu, 3, 2)
-DO_LD1_2(ld1sds, 3, 2)
-
-DO_LD1_2(ld1dd,  3, 3)
+static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
+                                      uint64_t *vg, target_ulong addr,
+                                      int esize, int msize, int wp_access,
+                                      uintptr_t retaddr)
+{
+    intptr_t mem_off, reg_off, reg_last;
+    int flags0 = info->page[0].flags;
+    int flags1 = info->page[1].flags;
 
-#undef DO_LD1_1
-#undef DO_LD1_2
+    if (likely(!((flags0 | flags1) & TLB_WATCHPOINT))) {
+        return;
+    }
 
-/*
- * Common helpers for all contiguous 2,3,4-register predicated loads.
- */
-static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
-                      uint32_t desc, int size, uintptr_t ra,
-                      sve_ld1_tlb_fn *tlb_fn)
-{
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
-    intptr_t i, oprsz = simd_oprsz(desc);
-    ARMVectorReg scratch[2] = { 0 };
+    /* Indicate that watchpoints are handled. */
+    info->page[0].flags = flags0 & ~TLB_WATCHPOINT;
+    info->page[1].flags = flags1 & ~TLB_WATCHPOINT;
+
+    if (flags0 & TLB_WATCHPOINT) {
+        mem_off = info->mem_off_first[0];
+        reg_off = info->reg_off_first[0];
+        reg_last = info->reg_off_last[0];
+
+        while (reg_off <= reg_last) {
+            uint64_t pg = vg[reg_off >> 6];
+            do {
+                if ((pg >> (reg_off & 63)) & 1) {
+                    cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize,
+                                         info->page[0].attrs, wp_access,
+                                         retaddr);
+                }
+                reg_off += esize;
+                mem_off += msize;
+            } while (reg_off <= reg_last && (reg_off & 63));
+        }
+    }
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; ) {
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
-        do {
-            if (pg & 1) {
-                tlb_fn(env, &scratch[0], i, addr, oi, ra);
-                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
-            }
-            i += size, pg >>= size;
-            addr += 2 * size;
-        } while (i & 15);
+    mem_off = info->mem_off_split;
+    if (mem_off >= 0) {
+        cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize,
+                             info->page[0].attrs, wp_access, retaddr);
     }
-    clear_helper_retaddr();
 
-    /* Wait until all exceptions have been raised to write back.  */
-    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
-    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+    mem_off = info->mem_off_first[1];
+    if ((flags1 & TLB_WATCHPOINT) && mem_off >= 0) {
+        reg_off = info->reg_off_first[1];
+        reg_last = info->reg_off_last[1];
+
+        do {
+            uint64_t pg = vg[reg_off >> 6];
+            do {
+                if ((pg >> (reg_off & 63)) & 1) {
+                    cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize,
+                                         info->page[1].attrs, wp_access,
+                                         retaddr);
+                }
+                reg_off += esize;
+                mem_off += msize;
+            } while (reg_off & 63);
+        } while (reg_off <= reg_last);
+    }
 }
 
-static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
-                      uint32_t desc, int size, uintptr_t ra,
-                      sve_ld1_tlb_fn *tlb_fn)
+typedef uint64_t mte_check_fn(CPUARMState *, uint32_t, uint64_t, uintptr_t);
+
+static inline QEMU_ALWAYS_INLINE void
+sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, uint64_t *vg,
+                            target_ulong addr, int esize, int msize,
+                            uint32_t mtedesc, uintptr_t ra, mte_check_fn *check)
 {
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
-    intptr_t i, oprsz = simd_oprsz(desc);
-    ARMVectorReg scratch[3] = { 0 };
+    intptr_t mem_off, reg_off, reg_last;
+
+    /* Process the page only if MemAttr == Tagged. */
+    if (arm_tlb_mte_tagged(&info->page[0].attrs)) {
+        mem_off = info->mem_off_first[0];
+        reg_off = info->reg_off_first[0];
+        reg_last = info->reg_off_split;
+        if (reg_last < 0) {
+            reg_last = info->reg_off_last[0];
+        }
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; ) {
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
         do {
-            if (pg & 1) {
-                tlb_fn(env, &scratch[0], i, addr, oi, ra);
-                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
-                tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
-            }
-            i += size, pg >>= size;
-            addr += 3 * size;
-        } while (i & 15);
+            uint64_t pg = vg[reg_off >> 6];
+            do {
+                if ((pg >> (reg_off & 63)) & 1) {
+                    check(env, mtedesc, addr, ra);
+                }
+                reg_off += esize;
+                mem_off += msize;
+            } while (reg_off <= reg_last && (reg_off & 63));
+        } while (reg_off <= reg_last);
     }
-    clear_helper_retaddr();
 
-    /* Wait until all exceptions have been raised to write back.  */
-    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
-    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
-    memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
+    mem_off = info->mem_off_first[1];
+    if (mem_off >= 0 && arm_tlb_mte_tagged(&info->page[1].attrs)) {
+        reg_off = info->reg_off_first[1];
+        reg_last = info->reg_off_last[1];
+
+        do {
+            uint64_t pg = vg[reg_off >> 6];
+            do {
+                if ((pg >> (reg_off & 63)) & 1) {
+                    check(env, mtedesc, addr, ra);
+                }
+                reg_off += esize;
+                mem_off += msize;
+            } while (reg_off & 63);
+        } while (reg_off <= reg_last);
+    }
 }
 
-static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
-                      uint32_t desc, int size, uintptr_t ra,
-                      sve_ld1_tlb_fn *tlb_fn)
+typedef void sve_cont_ldst_mte_check_fn(SVEContLdSt *info, CPUARMState *env,
+                                        uint64_t *vg, target_ulong addr,
+                                        int esize, int msize, uint32_t mtedesc,
+                                        uintptr_t ra);
+
+static void sve_cont_ldst_mte_check1(SVEContLdSt *info, CPUARMState *env,
+                                     uint64_t *vg, target_ulong addr, int esize,
+                                     int msize, uint32_t mtedesc, uintptr_t ra)
 {
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
-    intptr_t i, oprsz = simd_oprsz(desc);
-    ARMVectorReg scratch[4] = { 0 };
+    sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, mtedesc, ra,
+                                mte_check1);
+}
+
+static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env,
+                                     uint64_t *vg, target_ulong addr, int esize,
+                                     int msize, uint32_t mtedesc, uintptr_t ra)
+{
+    sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, mtedesc, ra,
+                                mte_checkN);
+}
+
+/*
+ * Common helper for all contiguous 1,2,3,4-register predicated stores.
+ */
+static inline QEMU_ALWAYS_INLINE void
+sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
+          uint32_t desc, const uintptr_t retaddr, const int esz, const int msz,
+          const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn,
+          sve_ldst1_tlb_fn *tlb_fn, sve_cont_ldst_mte_check_fn *mte_check_fn)
+{
+    const unsigned rd = simd_data(desc);
+    const intptr_t reg_max = simd_oprsz(desc);
+    intptr_t reg_off, reg_last, mem_off;
+    SVEContLdSt info;
+    void *host;
+    int flags, i;
+
+    /* Find the active elements.  */
+    if (!sve_cont_ldst_elements(env, &info, addr, vg, reg_max, esz, N << msz)) {
+        /* The entire predicate was false; no load occurs.  */
+        for (i = 0; i < N; ++i) {
+            memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max);
+        }
+        return;
+    }
+
+    /* Probe the page(s).  Exit with exception for any invalid page. */
+    sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, retaddr);
+
+    /* Handle watchpoints for all active elements. */
+    sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz,
+                              BP_MEM_READ, retaddr);
+
+    /*
+     * Handle mte checks for all active elements.
+     * Since TBI must be set for MTE, !mtedesc => !mte_active.
+     */
+    if (mte_check_fn && mtedesc) {
+        mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, mtedesc,
+                     retaddr);
+    }
+
+    flags = info.page[0].flags | info.page[1].flags;
+    if (unlikely(flags != 0)) {
+#ifdef CONFIG_USER_ONLY
+        g_assert_not_reached();
+#else
+        /*
+         * At least one page includes MMIO.
+         * Any bus operation can fail with cpu_transaction_failed,
+         * which for ARM will raise SyncExternal.  Perform the load
+         * into scratch memory to preserve register state until the end.
+         */
+        ARMVectorReg scratch[4] = {};
+
+        mem_off = info.mem_off_first[0];
+        reg_off = info.reg_off_first[0];
+        reg_last = info.reg_off_last[1];
+        if (reg_last < 0) {
+            reg_last = info.reg_off_split;
+            if (reg_last < 0) {
+                reg_last = info.reg_off_last[0];
+            }
+        }
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; ) {
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
         do {
-            if (pg & 1) {
-                tlb_fn(env, &scratch[0], i, addr, oi, ra);
-                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
-                tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
-                tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra);
+            uint64_t pg = vg[reg_off >> 6];
+            do {
+                if ((pg >> (reg_off & 63)) & 1) {
+                    for (i = 0; i < N; ++i) {
+                        tlb_fn(env, &scratch[i], reg_off,
+                               addr + mem_off + (i << msz), retaddr);
+                    }
+                }
+                reg_off += 1 << esz;
+                mem_off += N << msz;
+            } while (reg_off & 63);
+        } while (reg_off <= reg_last);
+
+        for (i = 0; i < N; ++i) {
+            memcpy(&env->vfp.zregs[(rd + i) & 31], &scratch[i], reg_max);
+        }
+        return;
+#endif
+    }
+
+    /* The entire operation is in RAM, on valid pages. */
+
+    for (i = 0; i < N; ++i) {
+        memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max);
+    }
+
+    mem_off = info.mem_off_first[0];
+    reg_off = info.reg_off_first[0];
+    reg_last = info.reg_off_last[0];
+    host = info.page[0].host;
+
+    while (reg_off <= reg_last) {
+        uint64_t pg = vg[reg_off >> 6];
+        do {
+            if ((pg >> (reg_off & 63)) & 1) {
+                for (i = 0; i < N; ++i) {
+                    host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off,
+                            (char*)host + mem_off + (i << msz));
+                }
             }
-            i += size, pg >>= size;
-            addr += 4 * size;
-        } while (i & 15);
+            reg_off += 1 << esz;
+            mem_off += N << msz;
+        } while (reg_off <= reg_last && (reg_off & 63));
     }
-    clear_helper_retaddr();
 
-    /* Wait until all exceptions have been raised to write back.  */
-    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
-    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
-    memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
-    memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz);
-}
+    /*
+     * Use the slow path to manage the cross-page misalignment.
+     * But we know this is RAM and cannot trap.
+     */
+    mem_off = info.mem_off_split;
+    if (unlikely(mem_off >= 0)) {
+        reg_off = info.reg_off_split;
+        for (i = 0; i < N; ++i) {
+            tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off,
+                   addr + mem_off + (i << msz), retaddr);
+        }
+    }
 
-#define DO_LDN_1(N) \
-void QEMU_FLATTEN HELPER(sve_ld##N##bb_r) \
-    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)  \
-{                                                                   \
-    sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb);  \
-}
+    mem_off = info.mem_off_first[1];
+    if (unlikely(mem_off >= 0)) {
+        reg_off = info.reg_off_first[1];
+        reg_last = info.reg_off_last[1];
+        host = info.page[1].host;
 
-#define DO_LDN_2(N, SUFF, SIZE)                                       \
-void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_le_r)                      \
-    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
-{                                                                     \
-    sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(),                 \
-                  sve_ld1##SUFF##_le_tlb);                            \
-}                                                                     \
-void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_be_r)                      \
-    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
-{                                                                     \
-    sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(),                 \
-                  sve_ld1##SUFF##_be_tlb);                            \
+        do {
+            uint64_t pg = vg[reg_off >> 6];
+            do {
+                if ((pg >> (reg_off & 63)) & 1) {
+                    for (i = 0; i < N; ++i) {
+                        host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off,
+                                (char*)host + mem_off + (i << msz));
+                    }
+                }
+                reg_off += 1 << esz;
+                mem_off += N << msz;
+            } while (reg_off & 63);
+        } while (reg_off <= reg_last);
+    }
 }
 
+static inline QEMU_ALWAYS_INLINE void
+sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc,
+              const uintptr_t ra, const int esz, const int msz, const int N,
+              sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn)
+{
+    uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+    int bit55 = extract64(addr, 55, 1);
+
+    /* Remove mtedesc from the normal sve descriptor. */
+    desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+    /* Perform gross MTE suppression early. */
+    if (!tbi_check(desc, bit55) ||
+        tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+        mtedesc = 0;
+    }
+
+    sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn,
+              N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN);
+}
+
+#define DO_LD1_1(NAME, ESZ)                                                    \
+    void HELPER(sve_##NAME##_r)(CPUARMState * env, void *vg,                   \
+                                target_ulong addr, uint32_t desc)              \
+    {                                                                          \
+        sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0,               \
+                  sve_##NAME##_host, sve_##NAME##_tlb, NULL);                  \
+    }                                                                          \
+    void HELPER(sve_##NAME##_r_mte)(CPUARMState * env, void *vg,               \
+                                    target_ulong addr, uint32_t desc)          \
+    {                                                                          \
+        sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1,              \
+                      sve_##NAME##_host, sve_##NAME##_tlb);                    \
+    }
+
+#define DO_LD1_2(NAME, ESZ, MSZ)                                               \
+    void HELPER(sve_##NAME##_le_r)(CPUARMState * env, void *vg,                \
+                                   target_ulong addr, uint32_t desc)           \
+    {                                                                          \
+        sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0,                \
+                  sve_##NAME##_le_host, sve_##NAME##_le_tlb, NULL);            \
+    }                                                                          \
+    void HELPER(sve_##NAME##_be_r)(CPUARMState * env, void *vg,                \
+                                   target_ulong addr, uint32_t desc)           \
+    {                                                                          \
+        sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0,                \
+                  sve_##NAME##_be_host, sve_##NAME##_be_tlb, NULL);            \
+    }                                                                          \
+    void HELPER(sve_##NAME##_le_r_mte)(CPUARMState * env, void *vg,            \
+                                       target_ulong addr, uint32_t desc)       \
+    {                                                                          \
+        sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1,               \
+                      sve_##NAME##_le_host, sve_##NAME##_le_tlb);              \
+    }                                                                          \
+    void HELPER(sve_##NAME##_be_r_mte)(CPUARMState * env, void *vg,            \
+                                       target_ulong addr, uint32_t desc)       \
+    {                                                                          \
+        sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1,               \
+                      sve_##NAME##_be_host, sve_##NAME##_be_tlb);              \
+    }
+
+DO_LD1_1(ld1bb, MO_8)
+DO_LD1_1(ld1bhu, MO_16)
+DO_LD1_1(ld1bhs, MO_16)
+DO_LD1_1(ld1bsu, MO_32)
+DO_LD1_1(ld1bss, MO_32)
+DO_LD1_1(ld1bdu, MO_64)
+DO_LD1_1(ld1bds, MO_64)
+
+DO_LD1_2(ld1hh, MO_16, MO_16)
+DO_LD1_2(ld1hsu, MO_32, MO_16)
+DO_LD1_2(ld1hss, MO_32, MO_16)
+DO_LD1_2(ld1hdu, MO_64, MO_16)
+DO_LD1_2(ld1hds, MO_64, MO_16)
+
+DO_LD1_2(ld1ss, MO_32, MO_32)
+DO_LD1_2(ld1sdu, MO_64, MO_32)
+DO_LD1_2(ld1sds, MO_64, MO_32)
+
+DO_LD1_2(ld1dd, MO_64, MO_64)
+
+#undef DO_LD1_1
+#undef DO_LD1_2
+
+#define DO_LDN_1(N)                                                            \
+    void HELPER(sve_ld##N##bb_r)(CPUARMState * env, void *vg,                  \
+                                 target_ulong addr, uint32_t desc)             \
+    {                                                                          \
+        sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0,              \
+                  sve_ld1bb_host, sve_ld1bb_tlb, NULL);                        \
+    }                                                                          \
+    void HELPER(sve_ld##N##bb_r_mte)(CPUARMState * env, void *vg,              \
+                                     target_ulong addr, uint32_t desc)         \
+    {                                                                          \
+        sve_ldN_r_mte(env, vg, addr, desc, GETPC(), MO_8, MO_8, N,             \
+                      sve_ld1bb_host, sve_ld1bb_tlb);                          \
+    }
+
+#define DO_LDN_2(N, SUFF, ESZ)                                                 \
+    void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState * env, void *vg,           \
+                                        target_ulong addr, uint32_t desc)      \
+    {                                                                          \
+        sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0,                \
+                  sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb, NULL);      \
+    }                                                                          \
+    void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState * env, void *vg,           \
+                                        target_ulong addr, uint32_t desc)      \
+    {                                                                          \
+        sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0,                \
+                  sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb, NULL);      \
+    }                                                                          \
+    void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState * env, void *vg,       \
+                                            target_ulong addr, uint32_t desc)  \
+    {                                                                          \
+        sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N,               \
+                      sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb);        \
+    }                                                                          \
+    void HELPER(sve_ld##N##SUFF##_be_r_mte)(CPUARMState * env, void *vg,       \
+                                            target_ulong addr, uint32_t desc)  \
+    {                                                                          \
+        sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N,               \
+                      sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb);        \
+    }
+
 DO_LDN_1(2)
 DO_LDN_1(3)
 DO_LDN_1(4)
 
-DO_LDN_2(2, hh, 2)
-DO_LDN_2(3, hh, 2)
-DO_LDN_2(4, hh, 2)
+DO_LDN_2(2, hh, MO_16)
+DO_LDN_2(3, hh, MO_16)
+DO_LDN_2(4, hh, MO_16)
 
-DO_LDN_2(2, ss, 4)
-DO_LDN_2(3, ss, 4)
-DO_LDN_2(4, ss, 4)
+DO_LDN_2(2, ss, MO_32)
+DO_LDN_2(3, ss, MO_32)
+DO_LDN_2(4, ss, MO_32)
 
-DO_LDN_2(2, dd, 8)
-DO_LDN_2(3, dd, 8)
-DO_LDN_2(4, dd, 8)
+DO_LDN_2(2, dd, MO_64)
+DO_LDN_2(3, dd, MO_64)
+DO_LDN_2(4, dd, MO_64)
 
 #undef DO_LDN_1
 #undef DO_LDN_2
@@ -4484,385 +4828,524 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz)
 }
 
 /*
- * Common helper for all contiguous first-fault loads.
+ * Common helper for all contiguous no-fault and first-fault loads.
  */
-static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
-                        uint32_t desc, const uintptr_t retaddr,
-                        const int esz, const int msz,
-                        sve_ld1_host_fn *host_fn,
-                        sve_ld1_tlb_fn *tlb_fn)
-{
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const int mmu_idx = get_mmuidx(oi);
-    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+static inline QEMU_ALWAYS_INLINE void
+sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
+              uint32_t desc, const uintptr_t retaddr, uint32_t mtedesc,
+              const int esz, const int msz, const SVEContFault fault,
+              sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn)
+{
+    const unsigned rd = simd_data(desc);
     void *vd = &env->vfp.zregs[rd];
-    const int diffsz = esz - msz;
     const intptr_t reg_max = simd_oprsz(desc);
-    const intptr_t mem_max = reg_max >> diffsz;
-    intptr_t split, reg_off, mem_off;
+    intptr_t reg_off, mem_off, reg_last;
+    SVEContLdSt info;
+    int flags;
     void *host;
 
-    /* Skip to the first active element.  */
-    reg_off = find_next_active(vg, 0, reg_max, esz);
-    if (unlikely(reg_off == reg_max)) {
+    /* Find the active elements.  */
+    if (!sve_cont_ldst_elements(env, &info, addr, vg, reg_max, esz, 1 << msz)) {
         /* The entire predicate was false; no load occurs.  */
         memset(vd, 0, reg_max);
         return;
     }
-    mem_off = reg_off >> diffsz;
-    set_helper_retaddr(retaddr);
+    reg_off = info.reg_off_first[0];
+
+    /* Probe the page(s). */
+    if (!sve_cont_ldst_pages(&info, fault, env, addr, MMU_DATA_LOAD, retaddr)) {
+        /* Fault on first element. */
+        tcg_debug_assert(fault == FAULT_NO);
+        memset(vd, 0, reg_max);
+        goto do_fault;
+    }
+
+    mem_off = info.mem_off_first[0];
+    flags = info.page[0].flags;
 
     /*
-     * If the (remaining) load is entirely within a single page, then:
-     * For softmmu, and the tlb hits, then no faults will occur;
-     * For user-only, either the first load will fault or none will.
-     * We can thus perform the load directly to the destination and
-     * Vd will be unmodified on any exception path.
+     * Disable MTE checking if the Tagged bit is not set.  Since TBI must
+     * be set within MTEDESC for MTE, !mtedesc => !mte_active.
      */
-    split = max_for_page(env->uc, addr, mem_off, mem_max);
-    if (likely(split == mem_max)) {
-        host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
-        if (test_host_page(host)) {
-            mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max);
-            tcg_debug_assert(mem_off == mem_max);
-            clear_helper_retaddr();
-            /* After any fault, zero any leading inactive elements.  */
+    if (arm_tlb_mte_tagged(&info.page[0].attrs)) {
+        mtedesc = 0;
+    }
+
+    if (fault == FAULT_FIRST) {
+        /* Trapping mte check for the first-fault element.  */
+        if (mtedesc) {
+            mte_check1(env, mtedesc, addr + mem_off, retaddr);
+        }
+
+        /*
+         * Special handling of the first active element,
+         * if it crosses a page boundary or is MMIO.
+         */
+        bool is_split = mem_off == info.mem_off_split;
+        if (unlikely(flags != 0) || unlikely(is_split)) {
+            /*
+             * Use the slow path for cross-page handling.
+             * Might trap for MMIO or watchpoints.
+             */
+            tlb_fn(env, vd, reg_off, addr + mem_off, retaddr);
+
+            /* After any fault, zero the other elements. */
             swap_memzero(vd, reg_off);
-            return;
+            reg_off += 1 << esz;
+            mem_off += 1 << msz;
+            swap_memzero((char*)vd + reg_off, reg_max - reg_off);
+
+            if (is_split) {
+                goto second_page;
+            }
+        } else {
+            memset(vd, 0, reg_max);
+        }
+    } else {
+        memset(vd, 0, reg_max);
+        if (unlikely(mem_off == info.mem_off_split)) {
+            /* The first active element crosses a page boundary. */
+            flags |= info.page[1].flags;
+            if (unlikely(flags & TLB_MMIO)) {
+                /* Some page is MMIO, see below. */
+                goto do_fault;
+            }
+            if (unlikely(flags & TLB_WATCHPOINT) &&
+                (cpu_watchpoint_address_matches(env_cpu(env), addr + mem_off,
+                                                1 << msz) &
+                 BP_MEM_READ)) {
+                /* Watchpoint hit, see below. */
+                goto do_fault;
+            }
+            if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) {
+                goto do_fault;
+            }
+            /*
+             * Use the slow path for cross-page handling.
+             * This is RAM, without a watchpoint, and will not trap.
+             */
+            tlb_fn(env, vd, reg_off, addr + mem_off, retaddr);
+            goto second_page;
         }
     }
 
     /*
-     * Perform one normal read, which will fault or not.
-     * But it is likely to bring the page into the tlb.
+     * From this point on, all memory operations are MemSingleNF.
+     *
+     * Per the MemSingleNF pseudocode, a no-fault load from Device memory
+     * must not actually hit the bus -- it returns (UNKNOWN, FAULT) instead.
+     *
+     * Unfortuately we do not have access to the memory attributes from the
+     * PTE to tell Device memory from Normal memory.  So we make a mostly
+     * correct check, and indicate (UNKNOWN, FAULT) for any MMIO.
+     * This gives the right answer for the common cases of "Normal memory,
+     * backed by host RAM" and "Device memory, backed by MMIO".
+     * The architecture allows us to suppress an NF load and return
+     * (UNKNOWN, FAULT) for any reason, so our behaviour for the corner
+     * case of "Normal memory, backed by MMIO" is permitted.  The case we
+     * get wrong is "Device memory, backed by host RAM", for which we
+     * should return (UNKNOWN, FAULT) for but do not.
+     *
+     * Similarly, CPU_BP breakpoints would raise exceptions, and so
+     * return (UNKNOWN, FAULT).  For simplicity, we consider gdb and
+     * architectural breakpoints the same.
      */
-    tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr);
-
-    /* After any fault, zero any leading predicated false elts.  */
-    swap_memzero(vd, reg_off);
-    mem_off += 1ULL << msz;
-    reg_off += 1ULL << esz;
-
-    /* Try again to read the balance of the page.  */
-    split = max_for_page(env->uc, addr, mem_off - 1, mem_max);
-    if (split >= (1ULL << msz)) {
-        host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
-        if (host) {
-            mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split);
-            reg_off = mem_off << diffsz;
-        }
+    if (unlikely(flags & TLB_MMIO)) {
+        goto do_fault;
     }
 
-    clear_helper_retaddr();
-    record_fault(env, reg_off, reg_max);
-}
+    reg_last = info.reg_off_last[0];
+    host = info.page[0].host;
 
-/*
- * Common helper for all contiguous no-fault loads.
- */
-static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
-                        uint32_t desc, const int esz, const int msz,
-                        sve_ld1_host_fn *host_fn)
-{
-    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
-    void *vd = &env->vfp.zregs[rd];
-    const int diffsz = esz - msz;
-    const intptr_t reg_max = simd_oprsz(desc);
-    const intptr_t mem_max = reg_max >> diffsz;
-    const int mmu_idx = cpu_mmu_index(env, false);
-    intptr_t split, reg_off, mem_off;
-    void *host;
+    do {
+        uint64_t pg = *(uint64_t *)((char*)vg + (reg_off >> 3));
+        do {
+            if ((pg >> (reg_off & 63)) & 1) {
+                if (unlikely(flags & TLB_WATCHPOINT) &&
+                    (cpu_watchpoint_address_matches(env_cpu(env),
+                                                    addr + mem_off, 1 << msz) &
+                     BP_MEM_READ)) {
+                    goto do_fault;
+                }
+                if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) {
+                    goto do_fault;
+                }
+                host_fn(vd, reg_off, (char*)host + mem_off);
+            }
+            reg_off += 1 << esz;
+            mem_off += 1 << msz;
+        } while (reg_off <= reg_last && (reg_off & 63));
+    } while (reg_off <= reg_last);
 
-    /* There will be no fault, so we may modify in advance.  */
-    memset(vd, 0, reg_max);
+    /*
+     * MemSingleNF is allowed to fail for any reason.  We have special
+     * code above to handle the first element crossing a page boundary.
+     * As an implementation choice, decline to handle a cross-page element
+     * in any other position.
+     */
+    reg_off = info.reg_off_split;
+    if (reg_off >= 0) {
+        goto do_fault;
+    }
 
-    /* Skip to the first active element.  */
-    reg_off = find_next_active(vg, 0, reg_max, esz);
-    if (unlikely(reg_off == reg_max)) {
-        /* The entire predicate was false; no load occurs.  */
+second_page:
+    reg_off = info.reg_off_first[1];
+    if (likely(reg_off < 0)) {
+        /* No active elements on the second page.  All done. */
         return;
     }
-    mem_off = reg_off >> diffsz;
 
     /*
-     * If the address is not in the TLB, we have no way to bring the
-     * entry into the TLB without also risking a fault.  Note that
-     * the corollary is that we never load from an address not in RAM.
-     *
-     * This last is out of spec, in a weird corner case.
-     * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory
-     * must not actually hit the bus -- it returns UNKNOWN data instead.
-     * But if you map non-RAM with Normal memory attributes and do a NF
-     * load then it should access the bus.  (Nobody ought actually do this
-     * in the real world, obviously.)
-     *
-     * Then there are the annoying special cases with watchpoints...
-     * TODO: Add a form of non-faulting loads using cc->tlb_fill(probe=true).
+     * MemSingleNF is allowed to fail for any reason.  As an implementation
+     * choice, decline to handle elements on the second page.  This should
+     * be low frequency as the guest walks through memory -- the next
+     * iteration of the guest's loop should be aligned on the page boundary,
+     * and then all following iterations will stay aligned.
      */
-    host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
-    split = max_for_page(env->uc, addr, mem_off, mem_max);
-    if (host && split >= (1ULL << msz)) {
-        mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split);
-        reg_off = mem_off << diffsz;
-    }
 
+do_fault:
     record_fault(env, reg_off, reg_max);
 }
 
-#define DO_LDFF1_LDNF1_1(PART, ESZ) \
-void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg,            \
-                                 target_ulong addr, uint32_t desc)      \
-{                                                                       \
-    sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0,                   \
-                sve_ld1##PART##_host, sve_ld1##PART##_tlb);             \
-}                                                                       \
-void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg,            \
-                                 target_ulong addr, uint32_t desc)      \
-{                                                                       \
-    sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host);     \
-}
-
-#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \
-void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg,         \
-                                    target_ulong addr, uint32_t desc)   \
-{                                                                       \
-    sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,                 \
-                sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb);       \
-}                                                                       \
-void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg,         \
-                                    target_ulong addr, uint32_t desc)   \
-{                                                                       \
-    sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \
-}                                                                       \
-void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg,         \
-                                    target_ulong addr, uint32_t desc)   \
-{                                                                       \
-    sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,                 \
-                sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb);       \
-}                                                                       \
-void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg,         \
-                                    target_ulong addr, uint32_t desc)   \
-{                                                                       \
-    sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \
-}
-
-DO_LDFF1_LDNF1_1(bb,  0)
-DO_LDFF1_LDNF1_1(bhu, 1)
-DO_LDFF1_LDNF1_1(bhs, 1)
-DO_LDFF1_LDNF1_1(bsu, 2)
-DO_LDFF1_LDNF1_1(bss, 2)
-DO_LDFF1_LDNF1_1(bdu, 3)
-DO_LDFF1_LDNF1_1(bds, 3)
-
-DO_LDFF1_LDNF1_2(hh,  1, 1)
-DO_LDFF1_LDNF1_2(hsu, 2, 1)
-DO_LDFF1_LDNF1_2(hss, 2, 1)
-DO_LDFF1_LDNF1_2(hdu, 3, 1)
-DO_LDFF1_LDNF1_2(hds, 3, 1)
-
-DO_LDFF1_LDNF1_2(ss,  2, 2)
-DO_LDFF1_LDNF1_2(sdu, 3, 2)
-DO_LDFF1_LDNF1_2(sds, 3, 2)
-
-DO_LDFF1_LDNF1_2(dd,  3, 3)
-
-#undef DO_LDFF1_LDNF1_1
-#undef DO_LDFF1_LDNF1_2
-
-/*
- * Store contiguous data, protected by a governing predicate.
- */
+static inline QEMU_ALWAYS_INLINE void
+sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, uint32_t desc,
+                  const uintptr_t retaddr, const int esz, const int msz,
+                  const SVEContFault fault, sve_ldst1_host_fn *host_fn,
+                  sve_ldst1_tlb_fn *tlb_fn)
+{
+    uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+    int bit55 = extract64(addr, 55, 1);
 
-#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
-static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
-                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
-{                                                                           \
-    TLB(env, addr, *(TYPEM *)((char *)vd + H(reg_off)), oi, ra);                    \
-}
+    /* Remove mtedesc from the normal sve descriptor. */
+    desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
-DO_ST_TLB(st1bb,   H1,  uint8_t, stb_p, 0, helper_ret_stb_mmu)
-DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu)
-DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu)
-DO_ST_TLB(st1bd,     , uint64_t, stb_p, 0, helper_ret_stb_mmu)
+    /* Perform gross MTE suppression early. */
+    if (!tbi_check(desc, bit55) ||
+        tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+        mtedesc = 0;
+    }
 
-DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu)
-DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu)
-DO_ST_TLB(st1hd_le,     , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu)
+    sve_ldnfff1_r(env, vg, addr, desc, retaddr, mtedesc, esz, msz, fault,
+                  host_fn, tlb_fn);
+}
+
+#define DO_LDFF1_LDNF1_1(PART, ESZ)                                            \
+    void HELPER(sve_ldff1##PART##_r)(CPUARMState * env, void *vg,              \
+                                     target_ulong addr, uint32_t desc)         \
+    {                                                                          \
+        sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_FIRST, \
+                      sve_ld1##PART##_host, sve_ld1##PART##_tlb);              \
+    }                                                                          \
+    void HELPER(sve_ldnf1##PART##_r)(CPUARMState * env, void *vg,              \
+                                     target_ulong addr, uint32_t desc)         \
+    {                                                                          \
+        sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_NO,    \
+                      sve_ld1##PART##_host, sve_ld1##PART##_tlb);              \
+    }                                                                          \
+    void HELPER(sve_ldff1##PART##_r_mte)(CPUARMState * env, void *vg,          \
+                                         target_ulong addr, uint32_t desc)     \
+    {                                                                          \
+        sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8,             \
+                          FAULT_FIRST, sve_ld1##PART##_host,                   \
+                          sve_ld1##PART##_tlb);                                \
+    }                                                                          \
+    void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState * env, void *vg,          \
+                                         target_ulong addr, uint32_t desc)     \
+    {                                                                          \
+        sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO,   \
+                          sve_ld1##PART##_host, sve_ld1##PART##_tlb);          \
+    }
 
-DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu)
-DO_ST_TLB(st1sd_le,     , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu)
+#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ)                                       \
+    void HELPER(sve_ldff1##PART##_le_r)(CPUARMState * env, void *vg,           \
+                                        target_ulong addr, uint32_t desc)      \
+    {                                                                          \
+        sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST,  \
+                      sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb);        \
+    }                                                                          \
+    void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState * env, void *vg,           \
+                                        target_ulong addr, uint32_t desc)      \
+    {                                                                          \
+        sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO,     \
+                      sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb);        \
+    }                                                                          \
+    void HELPER(sve_ldff1##PART##_be_r)(CPUARMState * env, void *vg,           \
+                                        target_ulong addr, uint32_t desc)      \
+    {                                                                          \
+        sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST,  \
+                      sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb);        \
+    }                                                                          \
+    void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState * env, void *vg,           \
+                                        target_ulong addr, uint32_t desc)      \
+    {                                                                          \
+        sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO,     \
+                      sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb);        \
+    }                                                                          \
+    void HELPER(sve_ldff1##PART##_le_r_mte)(CPUARMState * env, void *vg,       \
+                                            target_ulong addr, uint32_t desc)  \
+    {                                                                          \
+        sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
+                          sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb);    \
+    }                                                                          \
+    void HELPER(sve_ldnf1##PART##_le_r_mte)(CPUARMState * env, void *vg,       \
+                                            target_ulong addr, uint32_t desc)  \
+    {                                                                          \
+        sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO,    \
+                          sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb);    \
+    }                                                                          \
+    void HELPER(sve_ldff1##PART##_be_r_mte)(CPUARMState * env, void *vg,       \
+                                            target_ulong addr, uint32_t desc)  \
+    {                                                                          \
+        sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
+                          sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb);    \
+    }                                                                          \
+    void HELPER(sve_ldnf1##PART##_be_r_mte)(CPUARMState * env, void *vg,       \
+                                            target_ulong addr, uint32_t desc)  \
+    {                                                                          \
+        sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO,    \
+                          sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb);    \
+    }
 
-DO_ST_TLB(st1dd_le,     , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu)
+DO_LDFF1_LDNF1_1(bb, MO_8)
+DO_LDFF1_LDNF1_1(bhu, MO_16)
+DO_LDFF1_LDNF1_1(bhs, MO_16)
+DO_LDFF1_LDNF1_1(bsu, MO_32)
+DO_LDFF1_LDNF1_1(bss, MO_32)
+DO_LDFF1_LDNF1_1(bdu, MO_64)
+DO_LDFF1_LDNF1_1(bds, MO_64)
 
-DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu)
-DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu)
-DO_ST_TLB(st1hd_be,     , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu)
+DO_LDFF1_LDNF1_2(hh, MO_16, MO_16)
+DO_LDFF1_LDNF1_2(hsu, MO_32, MO_16)
+DO_LDFF1_LDNF1_2(hss, MO_32, MO_16)
+DO_LDFF1_LDNF1_2(hdu, MO_64, MO_16)
+DO_LDFF1_LDNF1_2(hds, MO_64, MO_16)
 
-DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu)
-DO_ST_TLB(st1sd_be,     , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu)
+DO_LDFF1_LDNF1_2(ss, MO_32, MO_32)
+DO_LDFF1_LDNF1_2(sdu, MO_64, MO_32)
+DO_LDFF1_LDNF1_2(sds, MO_64, MO_32)
 
-DO_ST_TLB(st1dd_be,     , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu)
+DO_LDFF1_LDNF1_2(dd, MO_64, MO_64)
 
-#undef DO_ST_TLB
+#undef DO_LDFF1_LDNF1_1
+#undef DO_LDFF1_LDNF1_2
 
 /*
- * Common helpers for all contiguous 1,2,3,4-register predicated stores.
+ * Common helper for all contiguous 1,2,3,4-register predicated stores.
  */
-static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
-                      uint32_t desc, const uintptr_t ra,
-                      const int esize, const int msize,
-                      sve_st1_tlb_fn *tlb_fn)
+
+static inline QEMU_ALWAYS_INLINE void
+sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc,
+          const uintptr_t retaddr, const int esz, const int msz, const int N,
+          uint32_t mtedesc, sve_ldst1_host_fn *host_fn,
+          sve_ldst1_tlb_fn *tlb_fn, sve_cont_ldst_mte_check_fn *mte_check_fn)
 {
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
-    intptr_t i, oprsz = simd_oprsz(desc);
-    void *vd = &env->vfp.zregs[rd];
+    const unsigned rd = simd_data(desc);
+    const intptr_t reg_max = simd_oprsz(desc);
+    intptr_t reg_off, reg_last, mem_off;
+    SVEContLdSt info;
+    void *host;
+    int i, flags;
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; ) {
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
-        do {
-            if (pg & 1) {
-                tlb_fn(env, vd, i, addr, oi, ra);
+    /* Find the active elements.  */
+    if (!sve_cont_ldst_elements(env, &info, addr, vg, reg_max, esz, N << msz)) {
+        /* The entire predicate was false; no store occurs.  */
+        return;
+    }
+
+    /* Probe the page(s).  Exit with exception for any invalid page. */
+    sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, retaddr);
+
+    /* Handle watchpoints for all active elements. */
+    sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz,
+                              BP_MEM_WRITE, retaddr);
+
+    /*
+     * Handle mte checks for all active elements.
+     * Since TBI must be set for MTE, !mtedesc => !mte_active.
+     */
+    if (mte_check_fn && mtedesc) {
+        mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, mtedesc,
+                     retaddr);
+    }
+
+    flags = info.page[0].flags | info.page[1].flags;
+    if (unlikely(flags != 0)) {
+#ifdef CONFIG_USER_ONLY
+        g_assert_not_reached();
+#else
+        /*
+         * At least one page includes MMIO.
+         * Any bus operation can fail with cpu_transaction_failed,
+         * which for ARM will raise SyncExternal.  We cannot avoid
+         * this fault and will leave with the store incomplete.
+         */
+        mem_off = info.mem_off_first[0];
+        reg_off = info.reg_off_first[0];
+        reg_last = info.reg_off_last[1];
+        if (reg_last < 0) {
+            reg_last = info.reg_off_split;
+            if (reg_last < 0) {
+                reg_last = info.reg_off_last[0];
             }
-            i += esize, pg >>= esize;
-            addr += msize;
-        } while (i & 15);
+        }
+
+        do {
+            uint64_t pg = vg[reg_off >> 6];
+            do {
+                if ((pg >> (reg_off & 63)) & 1) {
+                    for (i = 0; i < N; ++i) {
+                        tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off,
+                               addr + mem_off + (i << msz), retaddr);
+                    }
+                }
+                reg_off += 1 << esz;
+                mem_off += N << msz;
+            } while (reg_off & 63);
+        } while (reg_off <= reg_last);
+        return;
+#endif
     }
-    clear_helper_retaddr();
-}
 
-static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
-                      uint32_t desc, const uintptr_t ra,
-                      const int esize, const int msize,
-                      sve_st1_tlb_fn *tlb_fn)
-{
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
-    intptr_t i, oprsz = simd_oprsz(desc);
-    void *d1 = &env->vfp.zregs[rd];
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];
+    mem_off = info.mem_off_first[0];
+    reg_off = info.reg_off_first[0];
+    reg_last = info.reg_off_last[0];
+    host = info.page[0].host;
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; ) {
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+    while (reg_off <= reg_last) {
+        uint64_t pg = vg[reg_off >> 6];
         do {
-            if (pg & 1) {
-                tlb_fn(env, d1, i, addr, oi, ra);
-                tlb_fn(env, d2, i, addr + msize, oi, ra);
+            if ((pg >> (reg_off & 63)) & 1) {
+                for (i = 0; i < N; ++i) {
+                    host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off,
+                            (char*)host + mem_off + (i << msz));
+                }
             }
-            i += esize, pg >>= esize;
-            addr += 2 * msize;
-        } while (i & 15);
+            reg_off += 1 << esz;
+            mem_off += N << msz;
+        } while (reg_off <= reg_last && (reg_off & 63));
     }
-    clear_helper_retaddr();
-}
 
-static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
-                      uint32_t desc, const uintptr_t ra,
-                      const int esize, const int msize,
-                      sve_st1_tlb_fn *tlb_fn)
-{
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
-    intptr_t i, oprsz = simd_oprsz(desc);
-    void *d1 = &env->vfp.zregs[rd];
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];
-    void *d3 = &env->vfp.zregs[(rd + 2) & 31];
+    /*
+     * Use the slow path to manage the cross-page misalignment.
+     * But we know this is RAM and cannot trap.
+     */
+    mem_off = info.mem_off_split;
+    if (unlikely(mem_off >= 0)) {
+        reg_off = info.reg_off_split;
+        for (i = 0; i < N; ++i) {
+            tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off,
+                   addr + mem_off + (i << msz), retaddr);
+        }
+    }
+
+    mem_off = info.mem_off_first[1];
+    if (unlikely(mem_off >= 0)) {
+        reg_off = info.reg_off_first[1];
+        reg_last = info.reg_off_last[1];
+        host = info.page[1].host;
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; ) {
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
         do {
-            if (pg & 1) {
-                tlb_fn(env, d1, i, addr, oi, ra);
-                tlb_fn(env, d2, i, addr + msize, oi, ra);
-                tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
-            }
-            i += esize, pg >>= esize;
-            addr += 3 * msize;
-        } while (i & 15);
+            uint64_t pg = vg[reg_off >> 6];
+            do {
+                if ((pg >> (reg_off & 63)) & 1) {
+                    for (i = 0; i < N; ++i) {
+                        host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off,
+                                (char*)host + mem_off + (i << msz));
+                    }
+                }
+                reg_off += 1 << esz;
+                mem_off += N << msz;
+            } while (reg_off & 63);
+        } while (reg_off <= reg_last);
     }
-    clear_helper_retaddr();
 }
 
-static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
-                      uint32_t desc, const uintptr_t ra,
-                      const int esize, const int msize,
-                      sve_st1_tlb_fn *tlb_fn)
+static inline QEMU_ALWAYS_INLINE void
+sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc,
+              const uintptr_t ra, const int esz, const int msz, const int N,
+              sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn)
 {
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
-    intptr_t i, oprsz = simd_oprsz(desc);
-    void *d1 = &env->vfp.zregs[rd];
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];
-    void *d3 = &env->vfp.zregs[(rd + 2) & 31];
-    void *d4 = &env->vfp.zregs[(rd + 3) & 31];
+    uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+    int bit55 = extract64(addr, 55, 1);
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; ) {
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
-        do {
-            if (pg & 1) {
-                tlb_fn(env, d1, i, addr, oi, ra);
-                tlb_fn(env, d2, i, addr + msize, oi, ra);
-                tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
-                tlb_fn(env, d4, i, addr + 3 * msize, oi, ra);
-            }
-            i += esize, pg >>= esize;
-            addr += 4 * msize;
-        } while (i & 15);
+    /* Remove mtedesc from the normal sve descriptor. */
+    desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+    /* Perform gross MTE suppression early. */
+    if (!tbi_check(desc, bit55) ||
+        tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+        mtedesc = 0;
+    }
+
+    sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn,
+              N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN);
+}
+
+#define DO_STN_1(N, NAME, ESZ)                                                 \
+    void HELPER(sve_st##N##NAME##_r)(CPUARMState * env, void *vg,              \
+                                     target_ulong addr, uint32_t desc)         \
+    {                                                                          \
+        sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0,               \
+                  sve_st1##NAME##_host, sve_st1##NAME##_tlb, NULL);            \
+    }                                                                          \
+    void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState * env, void *vg,          \
+                                         target_ulong addr, uint32_t desc)     \
+    {                                                                          \
+        sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, N,              \
+                      sve_st1##NAME##_host, sve_st1##NAME##_tlb);              \
+    }
+
+#define DO_STN_2(N, NAME, ESZ, MSZ)                                            \
+    void HELPER(sve_st##N##NAME##_le_r)(CPUARMState * env, void *vg,           \
+                                        target_ulong addr, uint32_t desc)      \
+    {                                                                          \
+        sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0,                \
+                  sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb, NULL);      \
+    }                                                                          \
+    void HELPER(sve_st##N##NAME##_be_r)(CPUARMState * env, void *vg,           \
+                                        target_ulong addr, uint32_t desc)      \
+    {                                                                          \
+        sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0,                \
+                  sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb, NULL);      \
+    }                                                                          \
+    void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState * env, void *vg,       \
+                                            target_ulong addr, uint32_t desc)  \
+    {                                                                          \
+        sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N,               \
+                      sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb);        \
+    }                                                                          \
+    void HELPER(sve_st##N##NAME##_be_r_mte)(CPUARMState * env, void *vg,       \
+                                            target_ulong addr, uint32_t desc)  \
+    {                                                                          \
+        sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N,               \
+                      sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb);        \
     }
-    clear_helper_retaddr();
-}
-
-#define DO_STN_1(N, NAME, ESIZE) \
-void QEMU_FLATTEN HELPER(sve_st##N##NAME##_r) \
-    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)  \
-{                                                                   \
-    sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1,           \
-                  sve_st1##NAME##_tlb);                             \
-}
-
-#define DO_STN_2(N, NAME, ESIZE, MSIZE) \
-void QEMU_FLATTEN HELPER(sve_st##N##NAME##_le_r) \
-    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
-{                                                                     \
-    sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE,         \
-                  sve_st1##NAME##_le_tlb);                            \
-}                                                                     \
-void QEMU_FLATTEN HELPER(sve_st##N##NAME##_be_r)                      \
-    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
-{                                                                     \
-    sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE,         \
-                  sve_st1##NAME##_be_tlb);                            \
-}
-
-DO_STN_1(1, bb, 1)
-DO_STN_1(1, bh, 2)
-DO_STN_1(1, bs, 4)
-DO_STN_1(1, bd, 8)
-DO_STN_1(2, bb, 1)
-DO_STN_1(3, bb, 1)
-DO_STN_1(4, bb, 1)
-
-DO_STN_2(1, hh, 2, 2)
-DO_STN_2(1, hs, 4, 2)
-DO_STN_2(1, hd, 8, 2)
-DO_STN_2(2, hh, 2, 2)
-DO_STN_2(3, hh, 2, 2)
-DO_STN_2(4, hh, 2, 2)
-
-DO_STN_2(1, ss, 4, 4)
-DO_STN_2(1, sd, 8, 4)
-DO_STN_2(2, ss, 4, 4)
-DO_STN_2(3, ss, 4, 4)
-DO_STN_2(4, ss, 4, 4)
-
-DO_STN_2(1, dd, 8, 8)
-DO_STN_2(2, dd, 8, 8)
-DO_STN_2(3, dd, 8, 8)
-DO_STN_2(4, dd, 8, 8)
+
+DO_STN_1(1, bb, MO_8)
+DO_STN_1(1, bh, MO_16)
+DO_STN_1(1, bs, MO_32)
+DO_STN_1(1, bd, MO_64)
+DO_STN_1(2, bb, MO_8)
+DO_STN_1(3, bb, MO_8)
+DO_STN_1(4, bb, MO_8)
+
+DO_STN_2(1, hh, MO_16, MO_16)
+DO_STN_2(1, hs, MO_32, MO_16)
+DO_STN_2(1, hd, MO_64, MO_16)
+DO_STN_2(2, hh, MO_16, MO_16)
+DO_STN_2(3, hh, MO_16, MO_16)
+DO_STN_2(4, hh, MO_16, MO_16)
+
+DO_STN_2(1, ss, MO_32, MO_32)
+DO_STN_2(1, sd, MO_64, MO_32)
+DO_STN_2(2, ss, MO_32, MO_32)
+DO_STN_2(3, ss, MO_32, MO_32)
+DO_STN_2(4, ss, MO_32, MO_32)
+
+DO_STN_2(1, dd, MO_64, MO_64)
+DO_STN_2(2, dd, MO_64, MO_64)
+DO_STN_2(3, dd, MO_64, MO_64)
+DO_STN_2(4, dd, MO_64, MO_64)
 
 #undef DO_STN_1
 #undef DO_STN_2
@@ -4878,497 +5361,578 @@ typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs);
 
 static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs)
 {
-    return *(uint32_t *)((char *)reg + H1_4(reg_ofs));
+    return *(uint32_t *)((char*)reg + H1_4(reg_ofs));
 }
 
 static target_ulong off_zss_s(void *reg, intptr_t reg_ofs)
 {
-    return *(int32_t *)((char *)reg + H1_4(reg_ofs));
+    return *(int32_t *)((char*)reg + H1_4(reg_ofs));
 }
 
 static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs)
 {
-    return (uint32_t)*(uint64_t *)((char *)reg + reg_ofs);
+    return (uint32_t)*(uint64_t *)((char*)reg + reg_ofs);
 }
 
 static target_ulong off_zss_d(void *reg, intptr_t reg_ofs)
 {
-    return (int32_t)*(uint64_t *)((char *)reg + reg_ofs);
+    return (int32_t)*(uint64_t *)((char*)reg + reg_ofs);
 }
 
 static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
 {
-    return *(uint64_t *)((char *)reg + reg_ofs);
+    return *(uint64_t *)((char*)reg + reg_ofs);
 }
 
-static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
-                       target_ulong base, uint32_t desc, uintptr_t ra,
-                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+static inline QEMU_ALWAYS_INLINE void
+sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base,
+          uint32_t desc, uintptr_t retaddr, uint32_t mtedesc, int esize,
+          int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn,
+          sve_ldst1_tlb_fn *tlb_fn)
 {
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
-    intptr_t i, oprsz = simd_oprsz(desc);
-    ARMVectorReg scratch = { 0 };
+    uc_engine *uc = env->uc;
+    const int mmu_idx = cpu_mmu_index(env, false);
+    const intptr_t reg_max = simd_oprsz(desc);
+    const int scale = simd_data(desc);
+    ARMVectorReg scratch;
+    intptr_t reg_off;
+    SVEHostPage info, info2;
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; ) {
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+    memset(&scratch, 0, reg_max);
+    reg_off = 0;
+    do {
+        uint64_t pg = vg[reg_off >> 6];
         do {
             if (likely(pg & 1)) {
-                target_ulong off = off_fn(vm, i);
-                tlb_fn(env, &scratch, i, base + (off << scale), oi, ra);
+                target_ulong addr = base + (off_fn(vm, reg_off) << scale);
+                target_ulong in_page = -(addr | TARGET_PAGE_MASK);
+
+                sve_probe_page(&info, false, env, addr, 0, MMU_DATA_LOAD,
+                               mmu_idx, retaddr);
+
+                if (likely(in_page >= msize)) {
+                    if (unlikely(info.flags & TLB_WATCHPOINT)) {
+                        cpu_check_watchpoint(env_cpu(env), addr, msize,
+                                             info.attrs, BP_MEM_READ, retaddr);
+                    }
+                    if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) {
+                        mte_check1(env, mtedesc, addr, retaddr);
+                    }
+                    host_fn(&scratch, reg_off, info.host);
+                } else {
+                    /* Element crosses the page boundary. */
+                    sve_probe_page(&info2, false, env, addr + in_page, 0,
+                                   MMU_DATA_LOAD, mmu_idx, retaddr);
+                    if (unlikely((info.flags | info2.flags) & TLB_WATCHPOINT)) {
+                        cpu_check_watchpoint(env_cpu(env), addr, msize,
+                                             info.attrs, BP_MEM_READ, retaddr);
+                    }
+                    if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) {
+                        mte_check1(env, mtedesc, addr, retaddr);
+                    }
+                    tlb_fn(env, &scratch, reg_off, addr, retaddr);
+                }
             }
-            i += 4, pg >>= 4;
-        } while (i & 15);
-    }
-    clear_helper_retaddr();
+            reg_off += esize;
+            pg >>= esize;
+        } while (reg_off & 63);
+    } while (reg_off < reg_max);
 
     /* Wait until all exceptions have been raised to write back.  */
-    memcpy(vd, &scratch, oprsz);
+    memcpy(vd, &scratch, reg_max);
 }
 
-static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
-                       target_ulong base, uint32_t desc, uintptr_t ra,
-                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+static inline QEMU_ALWAYS_INLINE void
+sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+              target_ulong base, uint32_t desc, uintptr_t retaddr, int esize,
+              int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn,
+              sve_ldst1_tlb_fn *tlb_fn)
 {
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
-    intptr_t i, oprsz = simd_oprsz(desc) / 8;
-    ARMVectorReg scratch = { 0 };
+    uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+    /* Remove mtedesc from the normal sve descriptor. */
+    desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; i++) {
-        uint8_t pg = *(uint8_t *)((char *)vg + H1(i));
-        if (likely(pg & 1)) {
-            target_ulong off = off_fn(vm, i * 8);
-            tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra);
-        }
+    /*
+     * ??? TODO: For the 32-bit offset extractions, base + ofs cannot
+     * offset base entirely over the address space hole to change the
+     * pointer tag, or change the bit55 selector.  So we could here
+     * examine TBI + TCMA like we do for sve_ldN_r_mte().
+     */
+    sve_ld1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, esize, msize,
+              off_fn, host_fn, tlb_fn);
+}
+
+#define DO_LD1_ZPZ_S(MEM, OFS, MSZ)                                            \
+    void HELPER(sve_ld##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg,    \
+                                     void *vm, target_ulong base,              \
+                                     uint32_t desc)                            \
+    {                                                                          \
+        sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << (MSZ),      \
+                  off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb);     \
+    }                                                                          \
+    void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState * env, void *vd,        \
+                                           void *vg, void *vm,                 \
+                                           target_ulong base, uint32_t desc)   \
+    {                                                                          \
+        sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << (MSZ),     \
+                      off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
     }
-    clear_helper_retaddr();
 
-    /* Wait until all exceptions have been raised to write back.  */
-    memcpy(vd, &scratch, oprsz * 8);
-}
-
-#define DO_LD1_ZPZ_S(MEM, OFS) \
-void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \
-    (CPUARMState *env, void *vd, void *vg, void *vm,         \
-     target_ulong base, uint32_t desc)                       \
-{                                                            \
-    sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(),         \
-              off_##OFS##_s, sve_ld1##MEM##_tlb);            \
-}
-
-#define DO_LD1_ZPZ_D(MEM, OFS) \
-void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \
-    (CPUARMState *env, void *vd, void *vg, void *vm,         \
-     target_ulong base, uint32_t desc)                       \
-{                                                            \
-    sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(),         \
-               off_##OFS##_d, sve_ld1##MEM##_tlb);           \
-}
-
-DO_LD1_ZPZ_S(bsu, zsu)
-DO_LD1_ZPZ_S(bsu, zss)
-DO_LD1_ZPZ_D(bdu, zsu)
-DO_LD1_ZPZ_D(bdu, zss)
-DO_LD1_ZPZ_D(bdu, zd)
-
-DO_LD1_ZPZ_S(bss, zsu)
-DO_LD1_ZPZ_S(bss, zss)
-DO_LD1_ZPZ_D(bds, zsu)
-DO_LD1_ZPZ_D(bds, zss)
-DO_LD1_ZPZ_D(bds, zd)
-
-DO_LD1_ZPZ_S(hsu_le, zsu)
-DO_LD1_ZPZ_S(hsu_le, zss)
-DO_LD1_ZPZ_D(hdu_le, zsu)
-DO_LD1_ZPZ_D(hdu_le, zss)
-DO_LD1_ZPZ_D(hdu_le, zd)
-
-DO_LD1_ZPZ_S(hsu_be, zsu)
-DO_LD1_ZPZ_S(hsu_be, zss)
-DO_LD1_ZPZ_D(hdu_be, zsu)
-DO_LD1_ZPZ_D(hdu_be, zss)
-DO_LD1_ZPZ_D(hdu_be, zd)
-
-DO_LD1_ZPZ_S(hss_le, zsu)
-DO_LD1_ZPZ_S(hss_le, zss)
-DO_LD1_ZPZ_D(hds_le, zsu)
-DO_LD1_ZPZ_D(hds_le, zss)
-DO_LD1_ZPZ_D(hds_le, zd)
-
-DO_LD1_ZPZ_S(hss_be, zsu)
-DO_LD1_ZPZ_S(hss_be, zss)
-DO_LD1_ZPZ_D(hds_be, zsu)
-DO_LD1_ZPZ_D(hds_be, zss)
-DO_LD1_ZPZ_D(hds_be, zd)
-
-DO_LD1_ZPZ_S(ss_le, zsu)
-DO_LD1_ZPZ_S(ss_le, zss)
-DO_LD1_ZPZ_D(sdu_le, zsu)
-DO_LD1_ZPZ_D(sdu_le, zss)
-DO_LD1_ZPZ_D(sdu_le, zd)
-
-DO_LD1_ZPZ_S(ss_be, zsu)
-DO_LD1_ZPZ_S(ss_be, zss)
-DO_LD1_ZPZ_D(sdu_be, zsu)
-DO_LD1_ZPZ_D(sdu_be, zss)
-DO_LD1_ZPZ_D(sdu_be, zd)
-
-DO_LD1_ZPZ_D(sds_le, zsu)
-DO_LD1_ZPZ_D(sds_le, zss)
-DO_LD1_ZPZ_D(sds_le, zd)
-
-DO_LD1_ZPZ_D(sds_be, zsu)
-DO_LD1_ZPZ_D(sds_be, zss)
-DO_LD1_ZPZ_D(sds_be, zd)
-
-DO_LD1_ZPZ_D(dd_le, zsu)
-DO_LD1_ZPZ_D(dd_le, zss)
-DO_LD1_ZPZ_D(dd_le, zd)
-
-DO_LD1_ZPZ_D(dd_be, zsu)
-DO_LD1_ZPZ_D(dd_be, zss)
-DO_LD1_ZPZ_D(dd_be, zd)
+#define DO_LD1_ZPZ_D(MEM, OFS, MSZ)                                            \
+    void HELPER(sve_ld##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg,    \
+                                     void *vm, target_ulong base,              \
+                                     uint32_t desc)                            \
+    {                                                                          \
+        sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << (MSZ),      \
+                  off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb);     \
+    }                                                                          \
+    void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState * env, void *vd,        \
+                                           void *vg, void *vm,                 \
+                                           target_ulong base, uint32_t desc)   \
+    {                                                                          \
+        sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << (MSZ),     \
+                      off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+    }
+
+DO_LD1_ZPZ_S(bsu, zsu, MO_8)
+DO_LD1_ZPZ_S(bsu, zss, MO_8)
+DO_LD1_ZPZ_D(bdu, zsu, MO_8)
+DO_LD1_ZPZ_D(bdu, zss, MO_8)
+DO_LD1_ZPZ_D(bdu, zd, MO_8)
+
+DO_LD1_ZPZ_S(bss, zsu, MO_8)
+DO_LD1_ZPZ_S(bss, zss, MO_8)
+DO_LD1_ZPZ_D(bds, zsu, MO_8)
+DO_LD1_ZPZ_D(bds, zss, MO_8)
+DO_LD1_ZPZ_D(bds, zd, MO_8)
+
+DO_LD1_ZPZ_S(hsu_le, zsu, MO_16)
+DO_LD1_ZPZ_S(hsu_le, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_le, zsu, MO_16)
+DO_LD1_ZPZ_D(hdu_le, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_le, zd, MO_16)
+
+DO_LD1_ZPZ_S(hsu_be, zsu, MO_16)
+DO_LD1_ZPZ_S(hsu_be, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_be, zsu, MO_16)
+DO_LD1_ZPZ_D(hdu_be, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_be, zd, MO_16)
+
+DO_LD1_ZPZ_S(hss_le, zsu, MO_16)
+DO_LD1_ZPZ_S(hss_le, zss, MO_16)
+DO_LD1_ZPZ_D(hds_le, zsu, MO_16)
+DO_LD1_ZPZ_D(hds_le, zss, MO_16)
+DO_LD1_ZPZ_D(hds_le, zd, MO_16)
+
+DO_LD1_ZPZ_S(hss_be, zsu, MO_16)
+DO_LD1_ZPZ_S(hss_be, zss, MO_16)
+DO_LD1_ZPZ_D(hds_be, zsu, MO_16)
+DO_LD1_ZPZ_D(hds_be, zss, MO_16)
+DO_LD1_ZPZ_D(hds_be, zd, MO_16)
+
+DO_LD1_ZPZ_S(ss_le, zsu, MO_32)
+DO_LD1_ZPZ_S(ss_le, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_le, zsu, MO_32)
+DO_LD1_ZPZ_D(sdu_le, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_le, zd, MO_32)
+
+DO_LD1_ZPZ_S(ss_be, zsu, MO_32)
+DO_LD1_ZPZ_S(ss_be, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_be, zsu, MO_32)
+DO_LD1_ZPZ_D(sdu_be, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_be, zd, MO_32)
+
+DO_LD1_ZPZ_D(sds_le, zsu, MO_32)
+DO_LD1_ZPZ_D(sds_le, zss, MO_32)
+DO_LD1_ZPZ_D(sds_le, zd, MO_32)
+
+DO_LD1_ZPZ_D(sds_be, zsu, MO_32)
+DO_LD1_ZPZ_D(sds_be, zss, MO_32)
+DO_LD1_ZPZ_D(sds_be, zd, MO_32)
+
+DO_LD1_ZPZ_D(dd_le, zsu, MO_64)
+DO_LD1_ZPZ_D(dd_le, zss, MO_64)
+DO_LD1_ZPZ_D(dd_le, zd, MO_64)
+
+DO_LD1_ZPZ_D(dd_be, zsu, MO_64)
+DO_LD1_ZPZ_D(dd_be, zss, MO_64)
+DO_LD1_ZPZ_D(dd_be, zd, MO_64)
 
 #undef DO_LD1_ZPZ_S
 #undef DO_LD1_ZPZ_D
 
 /* First fault loads with a vector index.  */
 
-/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting.
- * The controlling predicate is known to be true.  Return true if the
- * load was successful.
- */
-typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off,
-                           target_ulong vaddr, int mmu_idx);
-
-#ifdef _MSC_VER
-#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
-static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
-                              target_ulong addr, int mmu_idx)               \
-{                                                                           \
-    struct uc_struct *uc = env->uc;                                         \
-    target_ulong next_page = 0ULL - (addr | TARGET_PAGE_MASK);                    \
-    if (likely(next_page - addr >= sizeof(TYPEM))) {                        \
-        void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);  \
-        if (likely(host)) {                                                 \
-            TYPEM val = HOST(host);                                         \
-            *(TYPEE *)((char *)vd + H(reg_off)) = val;                              \
-            return true;                                                    \
-        }                                                                   \
-    }                                                                       \
-    return false;                                                           \
-}
-#else
-#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
-static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
-                              target_ulong addr, int mmu_idx)               \
-{                                                                           \
-    struct uc_struct *uc = env->uc;                                         \
-    target_ulong next_page = -(addr | TARGET_PAGE_MASK);                    \
-    if (likely(next_page - addr >= sizeof(TYPEM))) {                        \
-        void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);  \
-        if (likely(host)) {                                                 \
-            TYPEM val = HOST(host);                                         \
-            *(TYPEE *)((char *)vd + H(reg_off)) = val;                              \
-            return true;                                                    \
-        }                                                                   \
-    }                                                                       \
-    return false;                                                           \
-}
-#endif
-
-DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p)
-DO_LD_NF(bss, H1_4, uint32_t,  int8_t, ldsb_p)
-DO_LD_NF(bdu,     , uint64_t, uint8_t, ldub_p)
-DO_LD_NF(bds,     , uint64_t,  int8_t, ldsb_p)
-
-DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p)
-DO_LD_NF(hss_le, H1_4, uint32_t,  int16_t, ldsw_le_p)
-DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p)
-DO_LD_NF(hss_be, H1_4, uint32_t,  int16_t, ldsw_be_p)
-DO_LD_NF(hdu_le,     , uint64_t, uint16_t, lduw_le_p)
-DO_LD_NF(hds_le,     , uint64_t,  int16_t, ldsw_le_p)
-DO_LD_NF(hdu_be,     , uint64_t, uint16_t, lduw_be_p)
-DO_LD_NF(hds_be,     , uint64_t,  int16_t, ldsw_be_p)
-
-DO_LD_NF(ss_le,  H1_4, uint32_t, uint32_t, ldl_le_p)
-DO_LD_NF(ss_be,  H1_4, uint32_t, uint32_t, ldl_be_p)
-DO_LD_NF(sdu_le,     , uint64_t, uint32_t, ldl_le_p)
-DO_LD_NF(sds_le,     , uint64_t,  int32_t, ldl_le_p)
-DO_LD_NF(sdu_be,     , uint64_t, uint32_t, ldl_be_p)
-DO_LD_NF(sds_be,     , uint64_t,  int32_t, ldl_be_p)
-
-DO_LD_NF(dd_le,      , uint64_t, uint64_t, ldq_le_p)
-DO_LD_NF(dd_be,      , uint64_t, uint64_t, ldq_be_p)
-
 /*
- * Common helper for all gather first-faulting loads.
+ * Common helpers for all gather first-faulting loads.
  */
-static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
-                                target_ulong base, uint32_t desc, uintptr_t ra,
-                                zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
-                                sve_ld1_nf_fn *nonfault_fn)
+
+static inline QEMU_ALWAYS_INLINE void
+sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+            target_ulong base, uint32_t desc, uintptr_t retaddr,
+            uint32_t mtedesc, const int esz, const int msz, zreg_off_fn *off_fn,
+            sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn)
 {
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const int mmu_idx = get_mmuidx(oi);
-    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
-    intptr_t reg_off, reg_max = simd_oprsz(desc);
-    target_ulong addr;
+    uc_engine *uc = env->uc;
+    const int mmu_idx = cpu_mmu_index(env, false);
+    const intptr_t reg_max = simd_oprsz(desc);
+    const int scale = simd_data(desc);
+    const int esize = 1 << esz;
+    const int msize = 1 << msz;
+    intptr_t reg_off;
+    SVEHostPage info;
+    target_ulong addr, in_page;
 
     /* Skip to the first true predicate.  */
-    reg_off = find_next_active(vg, 0, reg_max, MO_32);
-    if (likely(reg_off < reg_max)) {
-        /* Perform one normal read, which will fault or not.  */
-        set_helper_retaddr(ra);
-        addr = off_fn(vm, reg_off);
-        addr = base + (addr << scale);
-        tlb_fn(env, vd, reg_off, addr, oi, ra);
+    reg_off = find_next_active(vg, 0, reg_max, esz);
+    if (unlikely(reg_off >= reg_max)) {
+        /* The entire predicate was false; no load occurs.  */
+        memset(vd, 0, reg_max);
+        return;
+    }
 
-        /* The rest of the reads will be non-faulting.  */
-        clear_helper_retaddr();
+    /*
+     * Probe the first element, allowing faults.
+     */
+    addr = base + (off_fn(vm, reg_off) << scale);
+    if (mtedesc) {
+        mte_check1(env, mtedesc, addr, retaddr);
     }
+    tlb_fn(env, vd, reg_off, addr, retaddr);
 
-    /* After any fault, zero the leading predicated false elements.  */
+    /* After any fault, zero the other elements. */
     swap_memzero(vd, reg_off);
+    reg_off += esize;
+    swap_memzero((char*)vd + reg_off, reg_max - reg_off);
 
-    while (likely((reg_off += 4) < reg_max)) {
-        uint64_t pg = *(uint64_t *)((char *)vg + (reg_off >> 6) * 8);
-        if (likely((pg >> (reg_off & 63)) & 1)) {
-            addr = off_fn(vm, reg_off);
-            addr = base + (addr << scale);
-            if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) {
-                record_fault(env, reg_off, reg_max);
-                break;
+    /*
+     * Probe the remaining elements, not allowing faults.
+     */
+    while (reg_off < reg_max) {
+        uint64_t pg = vg[reg_off >> 6];
+        do {
+            if (likely((pg >> (reg_off & 63)) & 1)) {
+                addr = base + (off_fn(vm, reg_off) << scale);
+                in_page = -(addr | TARGET_PAGE_MASK);
+
+                if (unlikely(in_page < msize)) {
+                    /* Stop if the element crosses a page boundary. */
+                    goto fault;
+                }
+
+                sve_probe_page(&info, true, env, addr, 0, MMU_DATA_LOAD,
+                               mmu_idx, retaddr);
+                if (unlikely(info.flags & (TLB_INVALID_MASK | TLB_MMIO))) {
+                    goto fault;
+                }
+                if (unlikely(info.flags & TLB_WATCHPOINT) &&
+                    (cpu_watchpoint_address_matches(env_cpu(env), addr, msize) &
+                     BP_MEM_READ)) {
+                    goto fault;
+                }
+                if (mtedesc && arm_tlb_mte_tagged(&info.attrs) &&
+                    !mte_probe1(env, mtedesc, addr)) {
+                    goto fault;
+                }
+
+                host_fn(vd, reg_off, info.host);
             }
-        } else {
-            *(uint32_t *)((char *)vd + H1_4(reg_off)) = 0;
-        }
+            reg_off += esize;
+        } while (reg_off & 63);
     }
+    return;
+
+fault:
+    record_fault(env, reg_off, reg_max);
 }
 
-static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
-                                target_ulong base, uint32_t desc, uintptr_t ra,
-                                zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
-                                sve_ld1_nf_fn *nonfault_fn)
+static inline QEMU_ALWAYS_INLINE void
+sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+                target_ulong base, uint32_t desc, uintptr_t retaddr,
+                const int esz, const int msz, zreg_off_fn *off_fn,
+                sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn)
 {
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const int mmu_idx = get_mmuidx(oi);
-    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
-    intptr_t reg_off, reg_max = simd_oprsz(desc);
-    target_ulong addr;
-
-    /* Skip to the first true predicate.  */
-    reg_off = find_next_active(vg, 0, reg_max, MO_64);
-    if (likely(reg_off < reg_max)) {
-        /* Perform one normal read, which will fault or not.  */
-        set_helper_retaddr(ra);
-        addr = off_fn(vm, reg_off);
-        addr = base + (addr << scale);
-        tlb_fn(env, vd, reg_off, addr, oi, ra);
+    uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+    /* Remove mtedesc from the normal sve descriptor. */
+    desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
-        /* The rest of the reads will be non-faulting.  */
-        clear_helper_retaddr();
+    /*
+     * ??? TODO: For the 32-bit offset extractions, base + ofs cannot
+     * offset base entirely over the address space hole to change the
+     * pointer tag, or change the bit55 selector.  So we could here
+     * examine TBI + TCMA like we do for sve_ldN_r_mte().
+     */
+    sve_ldff1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, esz, msz, off_fn,
+                host_fn, tlb_fn);
+}
+
+#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ)                                          \
+    void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg,  \
+                                       void *vm, target_ulong base,            \
+                                       uint32_t desc)                          \
+    {                                                                          \
+        sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_32, MSZ,       \
+                    off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb);   \
+    }                                                                          \
+    void HELPER(sve_ldff##MEM##_##OFS##_mte)(CPUARMState * env, void *vd,      \
+                                             void *vg, void *vm,               \
+                                             target_ulong base, uint32_t desc) \
+    {                                                                          \
+        sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ,      \
+                        off_##OFS##_s, sve_ld1##MEM##_host,                    \
+                        sve_ld1##MEM##_tlb);                                   \
     }
 
-    /* After any fault, zero the leading predicated false elements.  */
-    swap_memzero(vd, reg_off);
-
-    while (likely((reg_off += 8) < reg_max)) {
-        uint8_t pg = *(uint8_t *)((char *)vg + H1(reg_off >> 3));
-        if (likely(pg & 1)) {
-            addr = off_fn(vm, reg_off);
-            addr = base + (addr << scale);
-            if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) {
-                record_fault(env, reg_off, reg_max);
-                break;
-            }
-        } else {
-            *(uint64_t *)((char *)vd + reg_off) = 0;
-        }
+#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ)                                          \
+    void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg,  \
+                                       void *vm, target_ulong base,            \
+                                       uint32_t desc)                          \
+    {                                                                          \
+        sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_64, MSZ,       \
+                    off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb);   \
+    }                                                                          \
+    void HELPER(sve_ldff##MEM##_##OFS##_mte)(CPUARMState * env, void *vd,      \
+                                             void *vg, void *vm,               \
+                                             target_ulong base, uint32_t desc) \
+    {                                                                          \
+        sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ,      \
+                        off_##OFS##_d, sve_ld1##MEM##_host,                    \
+                        sve_ld1##MEM##_tlb);                                   \
     }
-}
 
-#define DO_LDFF1_ZPZ_S(MEM, OFS) \
-void HELPER(sve_ldff##MEM##_##OFS)                                      \
-    (CPUARMState *env, void *vd, void *vg, void *vm,                    \
-     target_ulong base, uint32_t desc)                                  \
-{                                                                       \
-    sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(),                  \
-                 off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf);  \
-}
-
-#define DO_LDFF1_ZPZ_D(MEM, OFS) \
-void HELPER(sve_ldff##MEM##_##OFS)                                      \
-    (CPUARMState *env, void *vd, void *vg, void *vm,                    \
-     target_ulong base, uint32_t desc)                                  \
-{                                                                       \
-    sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(),                  \
-                 off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf);  \
-}
-
-DO_LDFF1_ZPZ_S(bsu, zsu)
-DO_LDFF1_ZPZ_S(bsu, zss)
-DO_LDFF1_ZPZ_D(bdu, zsu)
-DO_LDFF1_ZPZ_D(bdu, zss)
-DO_LDFF1_ZPZ_D(bdu, zd)
-
-DO_LDFF1_ZPZ_S(bss, zsu)
-DO_LDFF1_ZPZ_S(bss, zss)
-DO_LDFF1_ZPZ_D(bds, zsu)
-DO_LDFF1_ZPZ_D(bds, zss)
-DO_LDFF1_ZPZ_D(bds, zd)
-
-DO_LDFF1_ZPZ_S(hsu_le, zsu)
-DO_LDFF1_ZPZ_S(hsu_le, zss)
-DO_LDFF1_ZPZ_D(hdu_le, zsu)
-DO_LDFF1_ZPZ_D(hdu_le, zss)
-DO_LDFF1_ZPZ_D(hdu_le, zd)
-
-DO_LDFF1_ZPZ_S(hsu_be, zsu)
-DO_LDFF1_ZPZ_S(hsu_be, zss)
-DO_LDFF1_ZPZ_D(hdu_be, zsu)
-DO_LDFF1_ZPZ_D(hdu_be, zss)
-DO_LDFF1_ZPZ_D(hdu_be, zd)
-
-DO_LDFF1_ZPZ_S(hss_le, zsu)
-DO_LDFF1_ZPZ_S(hss_le, zss)
-DO_LDFF1_ZPZ_D(hds_le, zsu)
-DO_LDFF1_ZPZ_D(hds_le, zss)
-DO_LDFF1_ZPZ_D(hds_le, zd)
-
-DO_LDFF1_ZPZ_S(hss_be, zsu)
-DO_LDFF1_ZPZ_S(hss_be, zss)
-DO_LDFF1_ZPZ_D(hds_be, zsu)
-DO_LDFF1_ZPZ_D(hds_be, zss)
-DO_LDFF1_ZPZ_D(hds_be, zd)
-
-DO_LDFF1_ZPZ_S(ss_le,  zsu)
-DO_LDFF1_ZPZ_S(ss_le,  zss)
-DO_LDFF1_ZPZ_D(sdu_le, zsu)
-DO_LDFF1_ZPZ_D(sdu_le, zss)
-DO_LDFF1_ZPZ_D(sdu_le, zd)
-
-DO_LDFF1_ZPZ_S(ss_be,  zsu)
-DO_LDFF1_ZPZ_S(ss_be,  zss)
-DO_LDFF1_ZPZ_D(sdu_be, zsu)
-DO_LDFF1_ZPZ_D(sdu_be, zss)
-DO_LDFF1_ZPZ_D(sdu_be, zd)
-
-DO_LDFF1_ZPZ_D(sds_le, zsu)
-DO_LDFF1_ZPZ_D(sds_le, zss)
-DO_LDFF1_ZPZ_D(sds_le, zd)
-
-DO_LDFF1_ZPZ_D(sds_be, zsu)
-DO_LDFF1_ZPZ_D(sds_be, zss)
-DO_LDFF1_ZPZ_D(sds_be, zd)
-
-DO_LDFF1_ZPZ_D(dd_le, zsu)
-DO_LDFF1_ZPZ_D(dd_le, zss)
-DO_LDFF1_ZPZ_D(dd_le, zd)
-
-DO_LDFF1_ZPZ_D(dd_be, zsu)
-DO_LDFF1_ZPZ_D(dd_be, zss)
-DO_LDFF1_ZPZ_D(dd_be, zd)
+DO_LDFF1_ZPZ_S(bsu, zsu, MO_8)
+DO_LDFF1_ZPZ_S(bsu, zss, MO_8)
+DO_LDFF1_ZPZ_D(bdu, zsu, MO_8)
+DO_LDFF1_ZPZ_D(bdu, zss, MO_8)
+DO_LDFF1_ZPZ_D(bdu, zd, MO_8)
+
+DO_LDFF1_ZPZ_S(bss, zsu, MO_8)
+DO_LDFF1_ZPZ_S(bss, zss, MO_8)
+DO_LDFF1_ZPZ_D(bds, zsu, MO_8)
+DO_LDFF1_ZPZ_D(bds, zss, MO_8)
+DO_LDFF1_ZPZ_D(bds, zd, MO_8)
+
+DO_LDFF1_ZPZ_S(hsu_le, zsu, MO_16)
+DO_LDFF1_ZPZ_S(hsu_le, zss, MO_16)
+DO_LDFF1_ZPZ_D(hdu_le, zsu, MO_16)
+DO_LDFF1_ZPZ_D(hdu_le, zss, MO_16)
+DO_LDFF1_ZPZ_D(hdu_le, zd, MO_16)
+
+DO_LDFF1_ZPZ_S(hsu_be, zsu, MO_16)
+DO_LDFF1_ZPZ_S(hsu_be, zss, MO_16)
+DO_LDFF1_ZPZ_D(hdu_be, zsu, MO_16)
+DO_LDFF1_ZPZ_D(hdu_be, zss, MO_16)
+DO_LDFF1_ZPZ_D(hdu_be, zd, MO_16)
+
+DO_LDFF1_ZPZ_S(hss_le, zsu, MO_16)
+DO_LDFF1_ZPZ_S(hss_le, zss, MO_16)
+DO_LDFF1_ZPZ_D(hds_le, zsu, MO_16)
+DO_LDFF1_ZPZ_D(hds_le, zss, MO_16)
+DO_LDFF1_ZPZ_D(hds_le, zd, MO_16)
+
+DO_LDFF1_ZPZ_S(hss_be, zsu, MO_16)
+DO_LDFF1_ZPZ_S(hss_be, zss, MO_16)
+DO_LDFF1_ZPZ_D(hds_be, zsu, MO_16)
+DO_LDFF1_ZPZ_D(hds_be, zss, MO_16)
+DO_LDFF1_ZPZ_D(hds_be, zd, MO_16)
+
+DO_LDFF1_ZPZ_S(ss_le, zsu, MO_32)
+DO_LDFF1_ZPZ_S(ss_le, zss, MO_32)
+DO_LDFF1_ZPZ_D(sdu_le, zsu, MO_32)
+DO_LDFF1_ZPZ_D(sdu_le, zss, MO_32)
+DO_LDFF1_ZPZ_D(sdu_le, zd, MO_32)
+
+DO_LDFF1_ZPZ_S(ss_be, zsu, MO_32)
+DO_LDFF1_ZPZ_S(ss_be, zss, MO_32)
+DO_LDFF1_ZPZ_D(sdu_be, zsu, MO_32)
+DO_LDFF1_ZPZ_D(sdu_be, zss, MO_32)
+DO_LDFF1_ZPZ_D(sdu_be, zd, MO_32)
+
+DO_LDFF1_ZPZ_D(sds_le, zsu, MO_32)
+DO_LDFF1_ZPZ_D(sds_le, zss, MO_32)
+DO_LDFF1_ZPZ_D(sds_le, zd, MO_32)
+
+DO_LDFF1_ZPZ_D(sds_be, zsu, MO_32)
+DO_LDFF1_ZPZ_D(sds_be, zss, MO_32)
+DO_LDFF1_ZPZ_D(sds_be, zd, MO_32)
+
+DO_LDFF1_ZPZ_D(dd_le, zsu, MO_64)
+DO_LDFF1_ZPZ_D(dd_le, zss, MO_64)
+DO_LDFF1_ZPZ_D(dd_le, zd, MO_64)
+
+DO_LDFF1_ZPZ_D(dd_be, zsu, MO_64)
+DO_LDFF1_ZPZ_D(dd_be, zss, MO_64)
+DO_LDFF1_ZPZ_D(dd_be, zd, MO_64)
 
 /* Stores with a vector index.  */
 
-static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
-                       target_ulong base, uint32_t desc, uintptr_t ra,
-                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+static inline QEMU_ALWAYS_INLINE void
+sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base,
+          uint32_t desc, uintptr_t retaddr, uint32_t mtedesc, int esize,
+          int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn,
+          sve_ldst1_tlb_fn *tlb_fn)
 {
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
-    intptr_t i, oprsz = simd_oprsz(desc);
+    uc_engine *uc = env->uc;
+    const int mmu_idx = cpu_mmu_index(env, false);
+    const intptr_t reg_max = simd_oprsz(desc);
+    const int scale = simd_data(desc);
+    void *host[ARM_MAX_VQ * 4];
+    intptr_t reg_off, i;
+    SVEHostPage info, info2;
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; ) {
-        uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3));
+    /*
+     * Probe all of the elements for host addresses and flags.
+     */
+    i = reg_off = 0;
+    do {
+        uint64_t pg = vg[reg_off >> 6];
         do {
-            if (likely(pg & 1)) {
-                target_ulong off = off_fn(vm, i);
-                tlb_fn(env, vd, i, base + (off << scale), oi, ra);
+            target_ulong addr = base + (off_fn(vm, reg_off) << scale);
+            target_ulong in_page = -(addr | TARGET_PAGE_MASK);
+
+            host[i] = NULL;
+            if (likely((pg >> (reg_off & 63)) & 1)) {
+                if (likely(in_page >= msize)) {
+                    sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE,
+                                   mmu_idx, retaddr);
+                    host[i] = info.host;
+                } else {
+                    /*
+                     * Element crosses the page boundary.
+                     * Probe both pages, but do not record the host address,
+                     * so that we use the slow path.
+                     */
+                    sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE,
+                                   mmu_idx, retaddr);
+                    sve_probe_page(&info2, false, env, addr + in_page, 0,
+                                   MMU_DATA_STORE, mmu_idx, retaddr);
+                    info.flags |= info2.flags;
+                }
+
+                if (unlikely(info.flags & TLB_WATCHPOINT)) {
+                    cpu_check_watchpoint(env_cpu(env), addr, msize, info.attrs,
+                                         BP_MEM_WRITE, retaddr);
+                }
+
+                if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) {
+                    mte_check1(env, mtedesc, addr, retaddr);
+                }
             }
-            i += 4, pg >>= 4;
-        } while (i & 15);
-    }
-    clear_helper_retaddr();
+            i += 1;
+            reg_off += esize;
+        } while (reg_off & 63);
+    } while (reg_off < reg_max);
+
+    /*
+     * Now that we have recognized all exceptions except SyncExternal
+     * (from TLB_MMIO), which we cannot avoid, perform all of the stores.
+     *
+     * Note for the common case of an element in RAM, not crossing a page
+     * boundary, we have stored the host address in host[].  This doubles
+     * as a first-level check against the predicate, since only enabled
+     * elements have non-null host addresses.
+     */
+    i = reg_off = 0;
+    do {
+        void *h = host[i];
+        if (likely(h != NULL)) {
+            host_fn(vd, reg_off, h);
+        } else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) {
+            target_ulong addr = base + (off_fn(vm, reg_off) << scale);
+            tlb_fn(env, vd, reg_off, addr, retaddr);
+        }
+        i += 1;
+        reg_off += esize;
+    } while (reg_off < reg_max);
 }
 
-static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
-                       target_ulong base, uint32_t desc, uintptr_t ra,
-                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+static inline QEMU_ALWAYS_INLINE void
+sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+              target_ulong base, uint32_t desc, uintptr_t retaddr, int esize,
+              int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn,
+              sve_ldst1_tlb_fn *tlb_fn)
 {
-    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
-    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
-    intptr_t i, oprsz = simd_oprsz(desc) / 8;
+    uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+    /* Remove mtedesc from the normal sve descriptor. */
+    desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
 
-    set_helper_retaddr(ra);
-    for (i = 0; i < oprsz; i++) {
-        uint8_t pg = *(uint8_t *)((char *)vg + H1(i));
-        if (likely(pg & 1)) {
-            target_ulong off = off_fn(vm, i * 8);
-            tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra);
-        }
+    /*
+     * ??? TODO: For the 32-bit offset extractions, base + ofs cannot
+     * offset base entirely over the address space hole to change the
+     * pointer tag, or change the bit55 selector.  So we could here
+     * examine TBI + TCMA like we do for sve_ldN_r_mte().
+     */
+    sve_st1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, esize, msize,
+              off_fn, host_fn, tlb_fn);
+}
+
+#define DO_ST1_ZPZ_S(MEM, OFS, MSZ)                                            \
+    void HELPER(sve_st##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg,    \
+                                     void *vm, target_ulong base,              \
+                                     uint32_t desc)                            \
+    {                                                                          \
+        sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ,        \
+                  off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb);     \
+    }                                                                          \
+    void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState * env, void *vd,        \
+                                           void *vg, void *vm,                 \
+                                           target_ulong base, uint32_t desc)   \
+    {                                                                          \
+        sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ,       \
+                      off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
     }
-    clear_helper_retaddr();
-}
-
-#define DO_ST1_ZPZ_S(MEM, OFS) \
-void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \
-    (CPUARMState *env, void *vd, void *vg, void *vm,         \
-     target_ulong base, uint32_t desc)                       \
-{                                                            \
-    sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(),         \
-              off_##OFS##_s, sve_st1##MEM##_tlb);            \
-}
-
-#define DO_ST1_ZPZ_D(MEM, OFS) \
-void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \
-    (CPUARMState *env, void *vd, void *vg, void *vm,         \
-     target_ulong base, uint32_t desc)                       \
-{                                                            \
-    sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(),         \
-               off_##OFS##_d, sve_st1##MEM##_tlb);           \
-}
-
-DO_ST1_ZPZ_S(bs, zsu)
-DO_ST1_ZPZ_S(hs_le, zsu)
-DO_ST1_ZPZ_S(hs_be, zsu)
-DO_ST1_ZPZ_S(ss_le, zsu)
-DO_ST1_ZPZ_S(ss_be, zsu)
-
-DO_ST1_ZPZ_S(bs, zss)
-DO_ST1_ZPZ_S(hs_le, zss)
-DO_ST1_ZPZ_S(hs_be, zss)
-DO_ST1_ZPZ_S(ss_le, zss)
-DO_ST1_ZPZ_S(ss_be, zss)
-
-DO_ST1_ZPZ_D(bd, zsu)
-DO_ST1_ZPZ_D(hd_le, zsu)
-DO_ST1_ZPZ_D(hd_be, zsu)
-DO_ST1_ZPZ_D(sd_le, zsu)
-DO_ST1_ZPZ_D(sd_be, zsu)
-DO_ST1_ZPZ_D(dd_le, zsu)
-DO_ST1_ZPZ_D(dd_be, zsu)
-
-DO_ST1_ZPZ_D(bd, zss)
-DO_ST1_ZPZ_D(hd_le, zss)
-DO_ST1_ZPZ_D(hd_be, zss)
-DO_ST1_ZPZ_D(sd_le, zss)
-DO_ST1_ZPZ_D(sd_be, zss)
-DO_ST1_ZPZ_D(dd_le, zss)
-DO_ST1_ZPZ_D(dd_be, zss)
-
-DO_ST1_ZPZ_D(bd, zd)
-DO_ST1_ZPZ_D(hd_le, zd)
-DO_ST1_ZPZ_D(hd_be, zd)
-DO_ST1_ZPZ_D(sd_le, zd)
-DO_ST1_ZPZ_D(sd_be, zd)
-DO_ST1_ZPZ_D(dd_le, zd)
-DO_ST1_ZPZ_D(dd_be, zd)
+
+#define DO_ST1_ZPZ_D(MEM, OFS, MSZ)                                            \
+    void HELPER(sve_st##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg,    \
+                                     void *vm, target_ulong base,              \
+                                     uint32_t desc)                            \
+    {                                                                          \
+        sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ,        \
+                  off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb);     \
+    }                                                                          \
+    void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState * env, void *vd,        \
+                                           void *vg, void *vm,                 \
+                                           target_ulong base, uint32_t desc)   \
+    {                                                                          \
+        sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ,       \
+                      off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
+    }
+
+DO_ST1_ZPZ_S(bs, zsu, MO_8)
+DO_ST1_ZPZ_S(hs_le, zsu, MO_16)
+DO_ST1_ZPZ_S(hs_be, zsu, MO_16)
+DO_ST1_ZPZ_S(ss_le, zsu, MO_32)
+DO_ST1_ZPZ_S(ss_be, zsu, MO_32)
+
+DO_ST1_ZPZ_S(bs, zss, MO_8)
+DO_ST1_ZPZ_S(hs_le, zss, MO_16)
+DO_ST1_ZPZ_S(hs_be, zss, MO_16)
+DO_ST1_ZPZ_S(ss_le, zss, MO_32)
+DO_ST1_ZPZ_S(ss_be, zss, MO_32)
+
+DO_ST1_ZPZ_D(bd, zsu, MO_8)
+DO_ST1_ZPZ_D(hd_le, zsu, MO_16)
+DO_ST1_ZPZ_D(hd_be, zsu, MO_16)
+DO_ST1_ZPZ_D(sd_le, zsu, MO_32)
+DO_ST1_ZPZ_D(sd_be, zsu, MO_32)
+DO_ST1_ZPZ_D(dd_le, zsu, MO_64)
+DO_ST1_ZPZ_D(dd_be, zsu, MO_64)
+
+DO_ST1_ZPZ_D(bd, zss, MO_8)
+DO_ST1_ZPZ_D(hd_le, zss, MO_16)
+DO_ST1_ZPZ_D(hd_be, zss, MO_16)
+DO_ST1_ZPZ_D(sd_le, zss, MO_32)
+DO_ST1_ZPZ_D(sd_be, zss, MO_32)
+DO_ST1_ZPZ_D(dd_le, zss, MO_64)
+DO_ST1_ZPZ_D(dd_be, zss, MO_64)
+
+DO_ST1_ZPZ_D(bd, zd, MO_8)
+DO_ST1_ZPZ_D(hd_le, zd, MO_16)
+DO_ST1_ZPZ_D(hd_be, zd, MO_16)
+DO_ST1_ZPZ_D(sd_le, zd, MO_32)
+DO_ST1_ZPZ_D(sd_be, zd, MO_32)
+DO_ST1_ZPZ_D(dd_le, zd, MO_64)
+DO_ST1_ZPZ_D(dd_be, zd, MO_64)
 
 #undef DO_ST1_ZPZ_S
 #undef DO_ST1_ZPZ_D
diff --git a/qemu/target/arm/tlb_helper.c b/qemu/target/arm/tlb_helper.c
index e19d6c17a3..c3335f75ac 100644
--- a/qemu/target/arm/tlb_helper.c
+++ b/qemu/target/arm/tlb_helper.c
@@ -31,7 +31,7 @@ static inline uint32_t merge_syn_data_abort(uint32_t template_syn,
      * ISV field.
      */
     if (!(template_syn & ARM_EL_ISV) || target_el != 2 || s1ptw) {
-        syn = syn_data_abort_no_iss(same_el,
+        syn = syn_data_abort_no_iss(same_el, 0,
                                     ea, 0, s1ptw, is_write, fsc);
     } else {
         /*
@@ -154,6 +154,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     int prot, ret;
     MemTxAttrs attrs = { 0 };
     ARMMMUFaultInfo fi = { 0 };
+    ARMCacheAttrs cacheattrs = {};
 
     /*
      * Walk the page table and (if the mapping exists) add the page
@@ -163,7 +164,8 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
      */
     ret = get_phys_addr(&cpu->env, address, access_type,
                         core_to_arm_mmu_idx(&cpu->env, mmu_idx),
-                        &phys_addr, &attrs, &prot, &page_size, &fi, NULL);
+                        &phys_addr, &attrs, &prot, &page_size,
+                        &fi, &cacheattrs);
     if (likely(!ret)) {
         /*
          * Map a single [sub]page. Regions smaller than our declared
diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c
index 922976536e..9d8cc18836 100644
--- a/qemu/target/arm/translate-a64.c
+++ b/qemu/target/arm/translate-a64.c
@@ -38,11 +38,9 @@
 #include "kvm-consts.h"
 
 static const char *regnames[] = {
-    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
-    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
-    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
-    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
-};
+    "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",  "x8",  "x9",  "x10",
+    "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21",
+    "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "lr",  "sp"};
 
 enum a64_shift_type {
     A64_SHIFT_TYPE_LSL = 0,
@@ -62,40 +60,23 @@ typedef struct AArch64DecodeTable {
     AArch64DecodeFn *disas_fn;
 } AArch64DecodeTable;
 
-/* Function prototype for gen_ functions for calling Neon helpers */
-typedef void NeonGenOneOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32);
-typedef void NeonGenTwoOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32);
-typedef void NeonGenTwoOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
-typedef void NeonGenTwo64OpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64);
-typedef void NeonGenTwo64OpEnvFn(TCGContext *, TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
-typedef void NeonGenNarrowFn(TCGContext *, TCGv_i32, TCGv_i64);
-typedef void NeonGenNarrowEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i64);
-typedef void NeonGenWidenFn(TCGContext *, TCGv_i64, TCGv_i32);
-typedef void NeonGenTwoSingleOPFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
-typedef void NeonGenTwoDoubleOPFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
-typedef void NeonGenOneOpFn(TCGContext *, TCGv_i64, TCGv_i64);
-typedef void CryptoTwoOpFn(TCGContext *, TCGv_ptr, TCGv_ptr);
-typedef void CryptoThreeOpIntFn(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_i32);
-typedef void CryptoThreeOpFn(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr);
-typedef void AtomicThreeOpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
-
 /* initialize TCG globals.  */
 void a64_translate_init(struct uc_struct *uc)
 {
     int i;
     TCGContext *tcg_ctx = uc->tcg_ctx;
 
-    tcg_ctx->cpu_pc_arm64 = tcg_global_mem_new_i64(tcg_ctx, tcg_ctx->cpu_env,
-                                    offsetof(CPUARMState, pc),
-                                    "pc");
+    tcg_ctx->cpu_pc_arm64 = tcg_global_mem_new_i64(
+        tcg_ctx, tcg_ctx->cpu_env, offsetof(CPUARMState, pc), "pc");
     for (i = 0; i < 32; i++) {
-        tcg_ctx->cpu_X[i] = tcg_global_mem_new_i64(tcg_ctx, tcg_ctx->cpu_env,
-                                          offsetof(CPUARMState, xregs[i]),
-                                          regnames[i]);
+        tcg_ctx->cpu_X[i] = tcg_global_mem_new_i64(
+            tcg_ctx, tcg_ctx->cpu_env, offsetof(CPUARMState, xregs[i]),
+            regnames[i]);
     }
 
-    tcg_ctx->cpu_exclusive_high = tcg_global_mem_new_i64(tcg_ctx, tcg_ctx->cpu_env,
-        offsetof(CPUARMState, exclusive_high), "exclusive_high");
+    tcg_ctx->cpu_exclusive_high = tcg_global_mem_new_i64(
+        tcg_ctx, tcg_ctx->cpu_env, offsetof(CPUARMState, exclusive_high),
+        "exclusive_high");
 }
 
 /*
@@ -140,7 +121,8 @@ static void reset_btype(DisasContext *s)
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     if (s->btype != 0) {
         TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0);
-        tcg_gen_st_i32(tcg_ctx, zero, tcg_ctx->cpu_env, offsetof(CPUARMState, btype));
+        tcg_gen_st_i32(tcg_ctx, zero, tcg_ctx->cpu_env,
+                       offsetof(CPUARMState, btype));
         tcg_temp_free_i32(tcg_ctx, zero);
         s->btype = 0;
     }
@@ -155,7 +137,8 @@ static void set_btype(DisasContext *s, int val)
     tcg_debug_assert(val >= 1 && val <= 3);
 
     tcg_val = tcg_const_i32(tcg_ctx, val);
-    tcg_gen_st_i32(tcg_ctx, tcg_val, tcg_ctx->cpu_env, offsetof(CPUARMState, btype));
+    tcg_gen_st_i32(tcg_ctx, tcg_val, tcg_ctx->cpu_env,
+                   offsetof(CPUARMState, btype));
     tcg_temp_free_i32(tcg_ctx, tcg_val);
     s->btype = -1;
 }
@@ -178,8 +161,8 @@ void gen_a64_set_pc_im(TCGContext *tcg_ctx, uint64_t val)
  *
  * Here We have concatenated TBI{1,0} into tbi.
  */
-static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
-                                TCGv_i64 src, int tbi)
+static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, TCGv_i64 src,
+                                int tbi)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     if (tbi == 0) {
@@ -217,25 +200,118 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 }
 
 /*
- * Return a "clean" address for ADDR according to TBID.
- * This is always a fresh temporary, as we need to be able to
- * increment this independently of a dirty write-back address.
+ * Handle MTE and/or TBI.
+ *
+ * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
+ * for the tag to be present in the FAR_ELx register.  But for user-only
+ * mode we do not have a TLB with which to implement this, so we must
+ * remove the top byte now.
+ *
+ * Always return a fresh temporary that we can increment independently
+ * of the write-back address.
  */
-static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
+TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 clean = new_tmp_a64(s);
-    /*
-     * In order to get the correct value in the FAR_ELx register,
-     * we must present the memory subsystem with the "dirty" address
-     * including the TBI.  In system mode we can make this work via
-     * the TLB, dropping the TBI during translation.  But for user-only
-     * mode we don't have that option, and must remove the top byte now.
-     */
     tcg_gen_mov_i64(tcg_ctx, clean, addr);
     return clean;
 }
 
+/* Insert a zero tag into src, with the result at dst. */
+static void gen_address_with_allocation_tag0(TCGContext *tcg_ctx, TCGv_i64 dst,
+                                             TCGv_i64 src)
+{
+    tcg_gen_andi_i64(tcg_ctx, dst, src, ~MAKE_64BIT_MASK(56, 4));
+}
+
+static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, MMUAccessType acc,
+                             int log2_size)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    TCGv_i32 t_acc = tcg_const_i32(tcg_ctx, acc);
+    TCGv_i32 t_idx = tcg_const_i32(tcg_ctx, get_mem_index(s));
+    TCGv_i32 t_size = tcg_const_i32(tcg_ctx, 1 << log2_size);
+
+    glue(gen_helper_probe_access, UNICORN_ARCH_POSTFIX)(tcg_ctx, tcg_ctx->cpu_env, ptr, t_acc, t_idx,
+                            t_size);
+    tcg_temp_free_i32(tcg_ctx, t_acc);
+    tcg_temp_free_i32(tcg_ctx, t_idx);
+    tcg_temp_free_i32(tcg_ctx, t_size);
+}
+
+/*
+ * For MTE, check a single logical or atomic access.  This probes a single
+ * address, the exact one specified.  The size and alignment of the access
+ * is not relevant to MTE, per se, but watchpoints do require the size,
+ * and we want to recognize those before making any other changes to state.
+ */
+static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
+                                      bool is_write, bool tag_checked,
+                                      int log2_size, bool is_unpriv,
+                                      int core_idx)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    if (tag_checked && s->mte_active[is_unpriv]) {
+        TCGv_i32 tcg_desc;
+        TCGv_i64 ret;
+        int desc = 0;
+
+        FIELD_DP32(desc, MTEDESC, MIDX, core_idx, desc);
+        FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc);
+        FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc);
+        FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc);
+        FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size, desc);
+        tcg_desc = tcg_const_i32(tcg_ctx, desc);
+
+        ret = new_tmp_a64(s);
+        gen_helper_mte_check1(tcg_ctx, ret, tcg_ctx->cpu_env, tcg_desc, addr);
+        tcg_temp_free_i32(tcg_ctx, tcg_desc);
+
+        return ret;
+    }
+    return clean_data_tbi(s, addr);
+}
+
+TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
+                        bool tag_checked, int log2_size)
+{
+    return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
+                                 false, get_mem_index(s));
+}
+
+/*
+ * For MTE, check multiple logical sequential accesses.
+ */
+TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
+                        bool tag_checked, int log2_esize, int total_size)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) {
+        TCGv_i32 tcg_desc;
+        TCGv_i64 ret;
+        int desc = 0;
+
+        FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc);
+        FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc);
+        FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc);
+        FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc);
+        FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize, desc);
+        FIELD_DP32(desc, MTEDESC, TSIZE, total_size, desc);
+        tcg_desc = tcg_const_i32(tcg_ctx, desc);
+
+        ret = new_tmp_a64(s);
+        gen_helper_mte_checkN(tcg_ctx, ret, tcg_ctx->cpu_env, tcg_desc, addr);
+        tcg_temp_free_i32(tcg_ctx, tcg_desc);
+
+        return ret;
+    }
+    return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize);
+}
+
 typedef struct DisasCompare64 {
     TCGCond cond;
     TCGv_i64 value;
@@ -248,7 +324,7 @@ static void a64_test_cc(TCGContext *tcg_ctx, DisasCompare64 *c64, int cc)
     arm_test_cc(tcg_ctx, &c32, cc);
 
     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
-       * properly.  The NE/EQ comparisons are also fine with this choice.  */
+     * properly.  The NE/EQ comparisons are also fine with this choice.  */
     c64->cond = c32.cond;
     c64->value = tcg_temp_new_i64(tcg_ctx);
     tcg_gen_ext_i32_i64(tcg_ctx, c64->value, c32.value);
@@ -390,6 +466,13 @@ TCGv_i64 new_tmp_a64(DisasContext *s)
     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64(tcg_ctx);
 }
 
+TCGv_i64 new_tmp_a64_local(DisasContext *s)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    assert(s->tmp_a64_count < TMP_A64_MAX);
+    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_local_new_i64(tcg_ctx);
+}
+
 TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
@@ -505,7 +588,8 @@ static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i32 v = tcg_temp_new_i32(tcg_ctx);
 
-    tcg_gen_ld16u_i32(tcg_ctx, v, tcg_ctx->cpu_env, fp_reg_offset(s, reg, MO_16));
+    tcg_gen_ld16u_i32(tcg_ctx, v, tcg_ctx->cpu_env,
+                      fp_reg_offset(s, reg, MO_16));
     return v;
 }
 
@@ -518,14 +602,10 @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd)
     unsigned ofs = fp_reg_offset(s, rd, MO_64);
     unsigned vsz = vec_full_reg_size(s);
 
-    if (!is_q) {
-        TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
-        tcg_gen_st_i64(tcg_ctx, tcg_zero, tcg_ctx->cpu_env, ofs + 8);
-        tcg_temp_free_i64(tcg_ctx, tcg_zero);
-    }
-    if (vsz > 16) {
-        tcg_gen_gvec_dup8i(tcg_ctx, ofs + 16, vsz - 16, vsz - 16, 0);
-    }
+    TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
+    tcg_temp_free_i64(tcg_ctx, tcg_zero);
+    /* Nop move, with side effect of clearing the tail. */
+    tcg_gen_gvec_mov(tcg_ctx, MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
 }
 
 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
@@ -571,8 +651,8 @@ static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
                          GVecGen2Fn *gvec_fn, int vece)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
-            is_q ? 16 : 8, vec_full_reg_size(s));
+    gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd),
+            vec_full_reg_offset(s, rn), is_q ? 16 : 8, vec_full_reg_size(s));
 }
 
 /* Expand a 2-operand + immediate AdvSIMD vector operation using
@@ -582,8 +662,9 @@ static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
-            imm, is_q ? 16 : 8, vec_full_reg_size(s));
+    gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd),
+            vec_full_reg_offset(s, rn), imm, is_q ? 16 : 8,
+            vec_full_reg_size(s));
 }
 
 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
@@ -591,8 +672,9 @@ static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
                          GVecGen3Fn *gvec_fn, int vece)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
-            vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
+    gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd),
+            vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm),
+            is_q ? 16 : 8, vec_full_reg_size(s));
 }
 
 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
@@ -600,56 +682,31 @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
                          int rx, GVecGen4Fn *gvec_fn, int vece)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
-            vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
-            is_q ? 16 : 8, vec_full_reg_size(s));
-}
-
-/* Expand a 2-operand + immediate AdvSIMD vector operation using
- * an op descriptor.
- */
-static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
-                          int rn, int64_t imm, const GVecGen2i *gvec_op)
-{
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    tcg_gen_gvec_2i(tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
-                    is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
+    gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd),
+            vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm),
+            vec_full_reg_offset(s, rx), is_q ? 16 : 8, vec_full_reg_size(s));
 }
 
-/* Expand a 3-operand AdvSIMD vector operation using an op descriptor.  */
-static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
-                         int rn, int rm, const GVecGen3 *gvec_op)
+/* Expand a 2-operand operation using an out-of-line helper.  */
+static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, int rn,
+                             int data, gen_helper_gvec_2 *fn)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    tcg_gen_gvec_3(tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
-                   vec_full_reg_offset(s, rm), is_q ? 16 : 8,
-                   vec_full_reg_size(s), gvec_op);
+    tcg_gen_gvec_2_ool(tcg_ctx, vec_full_reg_offset(s, rd),
+                       vec_full_reg_offset(s, rn), is_q ? 16 : 8,
+                       vec_full_reg_size(s), data, fn);
 }
 
 /* Expand a 3-operand operation using an out-of-line helper.  */
-static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
-                             int rn, int rm, int data, gen_helper_gvec_3 *fn)
+static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, int rn, int rm,
+                             int data, gen_helper_gvec_3 *fn)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, rd),
-                       vec_full_reg_offset(s, rn),
-                       vec_full_reg_offset(s, rm),
+                       vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm),
                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 }
 
-/* Expand a 3-operand + env pointer operation using
- * an out-of-line helper.
- */
-static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
-                             int rn, int rm, gen_helper_gvec_3_ptr *fn)
-{
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd),
-                       vec_full_reg_offset(s, rn),
-                       vec_full_reg_offset(s, rm), tcg_ctx->cpu_env,
-                       is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
-}
-
 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
  * an out-of-line helper.
  */
@@ -660,9 +717,8 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, is_fp16);
     tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd),
-                       vec_full_reg_offset(s, rn),
-                       vec_full_reg_offset(s, rm), fpst,
-                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+                       vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm),
+                       fpst, is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
     tcg_temp_free_ptr(tcg_ctx, fpst);
 }
 
@@ -689,7 +745,8 @@ static inline void gen_logic_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 result)
 }
 
 /* dest = T0 + T1; compute C, N, V and Z flags */
-static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0,
+                       TCGv_i64 t1)
 {
     if (sf) {
         TCGv_i64 result, flag, tmp;
@@ -722,7 +779,8 @@ static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0,
         tcg_gen_movi_i32(tcg_ctx, tmp, 0);
         tcg_gen_extrl_i64_i32(tcg_ctx, t0_32, t0);
         tcg_gen_extrl_i64_i32(tcg_ctx, t1_32, t1);
-        tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, t1_32, tmp);
+        tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp,
+                         t1_32, tmp);
         tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF);
         tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32);
         tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32);
@@ -736,7 +794,8 @@ static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0,
 }
 
 /* dest = T0 - T1; compute C, N, V and Z flags */
-static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0,
+                       TCGv_i64 t1)
 {
     if (sf) {
         /* 64 bit arithmetic */
@@ -770,7 +829,8 @@ static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0,
         tcg_gen_extrl_i64_i32(tcg_ctx, t1_32, t1);
         tcg_gen_sub_i32(tcg_ctx, tcg_ctx->cpu_NF, t0_32, t1_32);
         tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF);
-        tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GEU, tcg_ctx->cpu_CF, t0_32, t1_32);
+        tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GEU, tcg_ctx->cpu_CF, t0_32,
+                            t1_32);
         tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32);
         tmp = tcg_temp_new_i32(tcg_ctx);
         tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32);
@@ -783,7 +843,8 @@ static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0,
 }
 
 /* dest = T0 + T1 + CF; do not compute flags. */
-static void gen_adc(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+static void gen_adc(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0,
+                    TCGv_i64 t1)
 {
     TCGv_i64 flag = tcg_temp_new_i64(tcg_ctx);
     tcg_gen_extu_i32_i64(tcg_ctx, flag, tcg_ctx->cpu_CF);
@@ -797,7 +858,8 @@ static void gen_adc(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCG
 }
 
 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
-static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0,
+                       TCGv_i64 t1)
 {
     if (sf) {
         TCGv_i64 result, cf_64, vf_64, tmp;
@@ -831,8 +893,10 @@ static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0,
 
         tcg_gen_extrl_i64_i32(tcg_ctx, t0_32, t0);
         tcg_gen_extrl_i64_i32(tcg_ctx, t1_32, t1);
-        tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, tcg_ctx->cpu_CF, tmp);
-        tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t1_32, tmp);
+        tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp,
+                         tcg_ctx->cpu_CF, tmp);
+        tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF,
+                         tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t1_32, tmp);
 
         tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF);
         tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32);
@@ -855,9 +919,8 @@ static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0,
  */
 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
                              TCGv_i64 tcg_addr, int size, int memidx,
-                             bool iss_valid,
-                             unsigned int iss_srt,
-                             bool iss_sf, bool iss_ar)
+                             bool iss_valid, unsigned int iss_srt, bool iss_sf,
+                             bool iss_ar)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     g_assert(size <= 3);
@@ -866,36 +929,27 @@ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
     if (iss_valid) {
         uint32_t syn;
 
-        syn = syn_data_abort_with_iss(0,
-                                      size,
-                                      false,
-                                      iss_srt,
-                                      iss_sf,
-                                      iss_ar,
+        syn = syn_data_abort_with_iss(0, size, false, iss_srt, iss_sf, iss_ar,
                                       0, 0, 0, 0, 0, false);
         disas_set_insn_syndrome(s, syn);
     }
 }
 
-static void do_gpr_st(DisasContext *s, TCGv_i64 source,
-                      TCGv_i64 tcg_addr, int size,
-                      bool iss_valid,
-                      unsigned int iss_srt,
+static void do_gpr_st(DisasContext *s, TCGv_i64 source, TCGv_i64 tcg_addr,
+                      int size, bool iss_valid, unsigned int iss_srt,
                       bool iss_sf, bool iss_ar)
 {
-    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
-                     iss_valid, iss_srt, iss_sf, iss_ar);
+    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s), iss_valid,
+                     iss_srt, iss_sf, iss_ar);
 }
 
 /*
  * Load from memory to GPR register
  */
-static void do_gpr_ld_memidx(DisasContext *s,
-                             TCGv_i64 dest, TCGv_i64 tcg_addr,
-                             int size, bool is_signed,
-                             bool extend, int memidx,
-                             bool iss_valid, unsigned int iss_srt,
-                             bool iss_sf, bool iss_ar)
+static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+                             int size, bool is_signed, bool extend, int memidx,
+                             bool iss_valid, unsigned int iss_srt, bool iss_sf,
+                             bool iss_ar)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     MemOp memop = s->be_data + size;
@@ -916,26 +970,18 @@ static void do_gpr_ld_memidx(DisasContext *s,
     if (iss_valid) {
         uint32_t syn;
 
-        syn = syn_data_abort_with_iss(0,
-                                      size,
-                                      is_signed,
-                                      iss_srt,
-                                      iss_sf,
-                                      iss_ar,
-                                      0, 0, 0, 0, 0, false);
+        syn = syn_data_abort_with_iss(0, size, is_signed, iss_srt, iss_sf,
+                                      iss_ar, 0, 0, 0, 0, 0, false);
         disas_set_insn_syndrome(s, syn);
     }
 }
 
-static void do_gpr_ld(DisasContext *s,
-                      TCGv_i64 dest, TCGv_i64 tcg_addr,
-                      int size, bool is_signed, bool extend,
-                      bool iss_valid, unsigned int iss_srt,
-                      bool iss_sf, bool iss_ar)
+static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+                      int size, bool is_signed, bool extend, bool iss_valid,
+                      unsigned int iss_srt, bool iss_sf, bool iss_ar)
 {
     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
-                     get_mem_index(s),
-                     iss_valid, iss_srt, iss_sf, iss_ar);
+                     get_mem_index(s), iss_valid, iss_srt, iss_sf, iss_ar);
 }
 
 /*
@@ -946,7 +992,8 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     /* This writes the bottom N bits of a 128 bit wide vector to memory */
     TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
-    tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_offset(s, srcidx, MO_64));
+    tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env,
+                   fp_reg_offset(s, srcidx, MO_64));
     if (size < 4) {
         tcg_gen_qemu_st_i64(tcg_ctx, tmp, tcg_addr, get_mem_index(s),
                             s->be_data + size);
@@ -955,11 +1002,12 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(tcg_ctx);
 
         tcg_gen_addi_i64(tcg_ctx, tcg_hiaddr, tcg_addr, 8);
-        tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
-                            s->be_data | MO_Q);
-        tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_hi_offset(s, srcidx));
-        tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
-                            s->be_data | MO_Q);
+        tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_hiaddr : tcg_addr,
+                            get_mem_index(s), s->be_data | MO_Q);
+        tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env,
+                       fp_reg_hi_offset(s, srcidx));
+        tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_addr : tcg_hiaddr,
+                            get_mem_index(s), s->be_data | MO_Q);
         tcg_temp_free_i64(tcg_ctx, tcg_hiaddr);
     }
 
@@ -974,11 +1022,10 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     /* This always zero-extends and writes to a full 128 bit wide vector */
     TCGv_i64 tmplo = tcg_temp_new_i64(tcg_ctx);
-    TCGv_i64 tmphi;
+    TCGv_i64 tmphi = NULL;
 
     if (size < 4) {
         MemOp memop = s->be_data + size;
-        tmphi = tcg_const_i64(tcg_ctx, 0);
         tcg_gen_qemu_ld_i64(tcg_ctx, tmplo, tcg_addr, get_mem_index(s), memop);
     } else {
         bool be = s->be_data == MO_BE;
@@ -988,20 +1035,24 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
         tcg_hiaddr = tcg_temp_new_i64(tcg_ctx);
 
         tcg_gen_addi_i64(tcg_ctx, tcg_hiaddr, tcg_addr, 8);
-        tcg_gen_qemu_ld_i64(tcg_ctx, tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
-                            s->be_data | MO_Q);
-        tcg_gen_qemu_ld_i64(tcg_ctx, tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
-                            s->be_data | MO_Q);
+        tcg_gen_qemu_ld_i64(tcg_ctx, tmplo, be ? tcg_hiaddr : tcg_addr,
+                            get_mem_index(s), s->be_data | MO_Q);
+        tcg_gen_qemu_ld_i64(tcg_ctx, tmphi, be ? tcg_addr : tcg_hiaddr,
+                            get_mem_index(s), s->be_data | MO_Q);
         tcg_temp_free_i64(tcg_ctx, tcg_hiaddr);
     }
 
-    tcg_gen_st_i64(tcg_ctx, tmplo, tcg_ctx->cpu_env, fp_reg_offset(s, destidx, MO_64));
-    tcg_gen_st_i64(tcg_ctx, tmphi, tcg_ctx->cpu_env, fp_reg_hi_offset(s, destidx));
+    tcg_gen_st_i64(tcg_ctx, tmplo, tcg_ctx->cpu_env,
+                   fp_reg_offset(s, destidx, MO_64));
 
     tcg_temp_free_i64(tcg_ctx, tmplo);
-    tcg_temp_free_i64(tcg_ctx, tmphi);
 
-    clear_vec_high(s, true, destidx);
+    if (tmphi) {
+        tcg_gen_st_i64(tcg_ctx, tmphi, tcg_ctx->cpu_env,
+                       fp_reg_hi_offset(s, destidx));
+        tcg_temp_free_i64(tcg_ctx, tmphi);
+    }
+    clear_vec_high(s, tmphi != NULL, destidx);
 }
 
 /*
@@ -1032,17 +1083,17 @@ static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
     case MO_32:
         tcg_gen_ld32u_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
         break;
-    case MO_8|MO_SIGN:
+    case MO_8 | MO_SIGN:
         tcg_gen_ld8s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
         break;
-    case MO_16|MO_SIGN:
+    case MO_16 | MO_SIGN:
         tcg_gen_ld16s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
         break;
-    case MO_32|MO_SIGN:
+    case MO_32 | MO_SIGN:
         tcg_gen_ld32s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
         break;
     case MO_64:
-    case MO_64|MO_SIGN:
+    case MO_64 | MO_SIGN:
         tcg_gen_ld_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
         break;
     default:
@@ -1062,14 +1113,14 @@ static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
     case MO_16:
         tcg_gen_ld16u_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
         break;
-    case MO_8|MO_SIGN:
+    case MO_8 | MO_SIGN:
         tcg_gen_ld8s_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
         break;
-    case MO_16|MO_SIGN:
+    case MO_16 | MO_SIGN:
         tcg_gen_ld16s_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
         break;
     case MO_32:
-    case MO_32|MO_SIGN:
+    case MO_32 | MO_SIGN:
         tcg_gen_ld_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
         break;
     default:
@@ -1129,7 +1180,8 @@ static void do_vec_st(DisasContext *s, int srcidx, int element,
     TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
 
     read_vec_element(s, tcg_tmp, srcidx, element, size);
-    tcg_gen_qemu_st_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
+    tcg_gen_qemu_st_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s),
+                        endian | size);
 
     tcg_temp_free_i64(tcg_ctx, tcg_tmp);
 }
@@ -1141,7 +1193,8 @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
 
-    tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
+    tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s),
+                        endian | size);
     write_vec_element(s, tcg_tmp, destidx, element, size);
 
     tcg_temp_free_i64(tcg_ctx, tcg_tmp);
@@ -1186,8 +1239,8 @@ bool sve_access_check(DisasContext *s)
  * optional shift. You will likely want to pass a temporary for the
  * destination register. See DecodeRegExtend() in the ARM ARM.
  */
-static void ext_and_shift_reg(TCGContext *tcg_ctx, TCGv_i64 tcg_out, TCGv_i64 tcg_in,
-                              int option, unsigned int shift)
+static void ext_and_shift_reg(TCGContext *tcg_ctx, TCGv_i64 tcg_out,
+                              TCGv_i64 tcg_in, int option, unsigned int shift)
 {
     int extsize = extract32(option, 0, 2);
     bool is_signed = extract32(option, 2, 1);
@@ -1319,8 +1372,8 @@ static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
     label_match = gen_new_label(tcg_ctx);
 
     reset_btype(s);
-    tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ,
-                        tcg_cmp, 0, label_match);
+    tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ, tcg_cmp, 0,
+                        label_match);
 
     gen_goto_tb(s, 0, s->base.pc_next);
     gen_set_label(tcg_ctx, label_match);
@@ -1351,8 +1404,8 @@ static void disas_test_b_imm(DisasContext *s, uint32_t insn)
     label_match = gen_new_label(tcg_ctx);
 
     reset_btype(s);
-    tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ,
-                        tcg_cmp, 0, label_match);
+    tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ, tcg_cmp, 0,
+                        label_match);
     tcg_temp_free_i64(tcg_ctx, tcg_cmp);
     gen_goto_tb(s, 0, s->base.pc_next);
     gen_set_label(tcg_ctx, label_match);
@@ -1393,8 +1446,8 @@ static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
 }
 
 /* HINT instruction group, including various allocated HINTs */
-static void handle_hint(DisasContext *s, uint32_t insn,
-                        unsigned int op1, unsigned int op2, unsigned int crm)
+static void handle_hint(DisasContext *s, uint32_t insn, unsigned int op1,
+                        unsigned int op2, unsigned int crm)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     unsigned int selector = crm << 3 | op2;
@@ -1431,71 +1484,80 @@ static void handle_hint(DisasContext *s, uint32_t insn,
         break;
     case 7: // 0b00111: /* XPACLRI */
         if (s->pauth_active) {
-            gen_helper_xpaci(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30]);
+            gen_helper_xpaci(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[30]);
         }
         break;
     case 8: // 0b01000: /* PACIA1716 */
         if (s->pauth_active) {
-            gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]);
+            gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]);
         }
         break;
     case 0xa: // 0b01010: /* PACIB1716 */
         if (s->pauth_active) {
-            gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]);
+            gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]);
         }
         break;
     case 0xc: // 0b01100: /* AUTIA1716 */
         if (s->pauth_active) {
-            gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]);
+            gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]);
         }
         break;
     case 0xe: // 0b01110: /* AUTIB1716 */
         if (s->pauth_active) {
-            gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]);
+            gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]);
         }
         break;
     case 0x18: // 0b11000: /* PACIAZ */
         if (s->pauth_active) {
-            gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30],
-                                new_tmp_a64_zero(s));
+            gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[30], new_tmp_a64_zero(s));
         }
         break;
     case 0x19: // 0b11001: /* PACIASP */
         if (s->pauth_active) {
-            gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]);
+            gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]);
         }
         break;
     case 0x1a: // 0b11010: /* PACIBZ */
         if (s->pauth_active) {
-            gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30],
-                                new_tmp_a64_zero(s));
+            gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[30], new_tmp_a64_zero(s));
         }
         break;
     case 0x1b: // 0b11011: /* PACIBSP */
         if (s->pauth_active) {
-            gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]);
+            gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]);
         }
         break;
     case 0x1c: // 0b11100: /* AUTIAZ */
         if (s->pauth_active) {
-            gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30],
-                              new_tmp_a64_zero(s));
+            gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[30], new_tmp_a64_zero(s));
         }
         break;
     case 0x1d: // 0b11101: /* AUTIASP */
         if (s->pauth_active) {
-            gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]);
+            gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]);
         }
         break;
     case 0x1e: // 0b11110: /* AUTIBZ */
         if (s->pauth_active) {
-            gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30],
-                              new_tmp_a64_zero(s));
+            gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[30], new_tmp_a64_zero(s));
         }
         break;
     case 0x1f: // 0b11111: /* AUTIBSP */
         if (s->pauth_active) {
-            gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]);
+            gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env,
+                             tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]);
         }
         break;
     default:
@@ -1511,8 +1573,8 @@ static void gen_clrex(DisasContext *s, uint32_t insn)
 }
 
 /* CLREX, DSB, DMB, ISB */
-static void handle_sync(DisasContext *s, uint32_t insn,
-                        unsigned int op1, unsigned int op2, unsigned int crm)
+static void handle_sync(DisasContext *s, uint32_t insn, unsigned int op1,
+                        unsigned int op2, unsigned int crm)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGBar bar;
@@ -1601,19 +1663,22 @@ static void gen_xaflag(TCGContext *tcg_ctx)
 
 static void gen_axflag(TCGContext *tcg_ctx)
 {
-    tcg_gen_sari_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, 31);         /* V ? -1 : 0 */
-    tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF, tcg_ctx->cpu_VF);     /* C & !V */
+    tcg_gen_sari_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF,
+                     31); /* V ? -1 : 0 */
+    tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF,
+                     tcg_ctx->cpu_VF); /* C & !V */
 
     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
-    tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, tcg_ctx->cpu_VF);
+    tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF,
+                     tcg_ctx->cpu_VF);
 
     tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_NF, 0);
     tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_VF, 0);
 }
 
 /* MSR (immediate) - move immediate to processor state field */
-static void handle_msr_i(DisasContext *s, uint32_t insn,
-                         unsigned int op1, unsigned int op2, unsigned int crm)
+static void handle_msr_i(DisasContext *s, uint32_t insn, unsigned int op1,
+                         unsigned int op2, unsigned int crm)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i32 t1;
@@ -1695,7 +1760,28 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,
         gen_helper_msr_i_daifclear(tcg_ctx, tcg_ctx->cpu_env, t1);
         tcg_temp_free_i32(tcg_ctx, t1);
         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
-        s->base.is_jmp = DISAS_UPDATE;
+        s->base.is_jmp = DISAS_UPDATE_EXIT;
+        break;
+
+    case 0x1c: /* TCO */
+        if (dc_isar_feature(aa64_mte, s)) {
+            /* Full MTE is enabled -- set the TCO bit as directed. */
+            if (crm & 1) {
+                set_pstate_bits(tcg_ctx, PSTATE_TCO);
+            } else {
+                clear_pstate_bits(tcg_ctx, PSTATE_TCO);
+            }
+            t1 = tcg_const_i32(tcg_ctx, s->current_el);
+            gen_helper_rebuild_hflags_a64(tcg_ctx, tcg_ctx->cpu_env, t1);
+            tcg_temp_free_i32(tcg_ctx, t1);
+            /* Many factors, including TCO, go into MTE_ACTIVE. */
+            s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+        } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
+            /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
+            s->base.is_jmp = DISAS_NEXT;
+        } else {
+            goto do_unallocated;
+        }
         break;
 
     default:
@@ -1738,7 +1824,8 @@ static void gen_set_nzcv(TCGContext *tcg_ctx, TCGv_i64 tcg_rt)
     tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_NF, nzcv, (1U << 31));
     /* bit 30, Z */
     tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_ZF, nzcv, (1 << 30));
-    tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, 0);
+    tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF,
+                         0);
     /* bit 29, C */
     tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_CF, nzcv, (1 << 29));
     tcg_gen_shri_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF, 29);
@@ -1748,7 +1835,6 @@ static void gen_set_nzcv(TCGContext *tcg_ctx, TCGv_i64 tcg_rt)
     tcg_temp_free_i32(tcg_ctx, nzcv);
 }
 
-
 static TCGLabel *gen_hook_sys(DisasContext *s, uint32_t insn, struct hook *hk)
 {
     uc_engine *uc = s->uc;
@@ -1759,17 +1845,18 @@ static TCGLabel *gen_hook_sys(DisasContext *s, uint32_t insn, struct hook *hk)
 
     tcg_skip = tcg_temp_new_i32(tcg_ctx);
     tcg_insn = tcg_const_i32(tcg_ctx, insn);
-    tcg_hk = tcg_const_ptr(tcg_ctx, (void*)hk);
+    tcg_hk = tcg_const_ptr(tcg_ctx, (void *)hk);
 
     // Sync pc in advance.
     gen_a64_set_pc_im(tcg_ctx, s->pc_curr);
 
     // Only one hook per instruction for SYS/SYSL/MRS/MSR is allowed.
     // This is intended and may be extended if it's really necessary.
-    gen_helper_uc_hooksys64(tcg_ctx, tcg_skip, tcg_ctx->cpu_env, tcg_insn, tcg_hk);
+    gen_helper_uc_hooksys64(tcg_ctx, tcg_skip, tcg_ctx->cpu_env, tcg_insn,
+                            tcg_hk);
 
     tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, tcg_skip, 0, label);
-    
+
     tcg_temp_free_i32(tcg_ctx, tcg_skip);
     tcg_temp_free_i32(tcg_ctx, tcg_insn);
     tcg_temp_free_ptr(tcg_ctx, tcg_hk);
@@ -1777,7 +1864,8 @@ static TCGLabel *gen_hook_sys(DisasContext *s, uint32_t insn, struct hook *hk)
     return label;
 }
 
-static void may_gen_set_label(DisasContext *s, TCGLabel *label) {
+static void may_gen_set_label(DisasContext *s, TCGLabel *label)
+{
     if (label) {
         gen_set_label(s->uc->tcg_ctx, label);
     }
@@ -1802,7 +1890,8 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
     struct hook *hook;
     HOOK_FOREACH_VAR_DECLARE;
 
-    HOOK_FOREACH(uc, hook, UC_HOOK_INSN) {
+    HOOK_FOREACH(uc, hook, UC_HOOK_INSN)
+    {
         if (hook->to_delete)
             continue;
 
@@ -1811,32 +1900,32 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         }
 
         switch (hook->insn) {
-            case UC_ARM64_INS_MRS: {
-                if (isread && (op0 == 2 || op0 == 3)) {
-                    label = gen_hook_sys(s, insn, hook);
-                }
-                break;
+        case UC_ARM64_INS_MRS: {
+            if (isread && (op0 == 2 || op0 == 3)) {
+                label = gen_hook_sys(s, insn, hook);
             }
-            case UC_ARM64_INS_MSR: {
-                if (!isread && (op0 == 2 || op0 == 3)) {
-                    label = gen_hook_sys(s, insn, hook);
-                }
-                break;
+            break;
+        }
+        case UC_ARM64_INS_MSR: {
+            if (!isread && (op0 == 2 || op0 == 3)) {
+                label = gen_hook_sys(s, insn, hook);
             }
-            case UC_ARM64_INS_SYSL: {
-                if (isread && op0 == 1) {
-                    label = gen_hook_sys(s, insn, hook);
-                }
-                break;
+            break;
+        }
+        case UC_ARM64_INS_SYSL: {
+            if (isread && op0 == 1) {
+                label = gen_hook_sys(s, insn, hook);
             }
-            case UC_ARM64_INS_SYS: {
-                if (!isread && op0 == 1) {
-                    label = gen_hook_sys(s, insn, hook);
-                }
-                break;
+            break;
+        }
+        case UC_ARM64_INS_SYS: {
+            if (!isread && op0 == 1) {
+                label = gen_hook_sys(s, insn, hook);
             }
-            default:
-                break;
+            break;
+        }
+        default:
+            break;
         }
 
         if (label) {
@@ -1844,15 +1933,16 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         }
     }
 
-    ri = get_arm_cp_reginfo(s->cp_regs,
-                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
-                                               crn, crm, op0, op1, op2));
+    ri = get_arm_cp_reginfo(
+        s->cp_regs,
+        ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2));
 
     if (!ri) {
         /* Unknown register; this might be a guest error or a QEMU
          * unimplemented feature.
          */
-        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
+        qemu_log_mask(LOG_UNIMP,
+                      "%s access to unsupported AArch64 "
                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
                       isread ? "read" : "write", op0, op1, crn, crm, op2);
         unallocated_encoding(s);
@@ -1880,7 +1970,8 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
         tcg_syn = tcg_const_i32(tcg_ctx, syndrome);
         tcg_isread = tcg_const_i32(tcg_ctx, isread);
-        gen_helper_access_check_cp_reg(tcg_ctx, tcg_ctx->cpu_env, tmpptr, tcg_syn, tcg_isread);
+        gen_helper_access_check_cp_reg(tcg_ctx, tcg_ctx->cpu_env, tmpptr,
+                                       tcg_syn, tcg_isread);
         tcg_temp_free_ptr(tcg_ctx, tmpptr);
         tcg_temp_free_i32(tcg_ctx, tcg_syn);
         tcg_temp_free_i32(tcg_ctx, tcg_isread);
@@ -1916,10 +2007,62 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         return;
     case ARM_CP_DC_ZVA:
         /* Writes clear the aligned block of memory which rt points into. */
-        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
+        if (s->mte_active[0]) {
+            TCGv_i32 t_desc;
+            int desc = 0;
+
+            FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc);
+            FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc);
+            FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc);
+            t_desc = tcg_const_i32(tcg_ctx, desc);
+
+            tcg_rt = new_tmp_a64(s);
+            gen_helper_mte_check_zva(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, t_desc,
+                                     cpu_reg(s, rt));
+            tcg_temp_free_i32(tcg_ctx, t_desc);
+        } else {
+            tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
+        }
         gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, tcg_rt);
         may_gen_set_label(s, label);
         return;
+    case ARM_CP_DC_GVA: {
+        TCGv_i64 clean_addr, tag;
+
+        /*
+         * DC_GVA, like DC_ZVA, requires that we supply the original
+         * pointer for an invalid page.  Probe that address first.
+         */
+        tcg_rt = cpu_reg(s, rt);
+        clean_addr = clean_data_tbi(s, tcg_rt);
+        gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
+
+        if (s->ata) {
+            /* Extract the tag from the register to match STZGM.  */
+            tag = tcg_temp_new_i64(tcg_ctx);
+            tcg_gen_shri_i64(tcg_ctx, tag, tcg_rt, 56);
+            gen_helper_stzgm_tags(tcg_ctx, tcg_ctx->cpu_env, clean_addr, tag);
+            tcg_temp_free_i64(tcg_ctx, tag);
+        }
+    }
+        return;
+    case ARM_CP_DC_GZVA: {
+        TCGv_i64 clean_addr, tag;
+
+        /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
+        tcg_rt = cpu_reg(s, rt);
+        clean_addr = clean_data_tbi(s, tcg_rt);
+        gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, clean_addr);
+
+        if (s->ata) {
+            /* Extract the tag from the register to match STZGM.  */
+            tag = tcg_temp_new_i64(tcg_ctx);
+            tcg_gen_shri_i64(tcg_ctx, tag, tcg_rt, 56);
+            gen_helper_stzgm_tags(tcg_ctx, tcg_ctx->cpu_env, clean_addr, tag);
+            tcg_temp_free_i64(tcg_ctx, tag);
+        }
+    }
+        return;
     default:
         break;
     }
@@ -1961,7 +2104,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
 
     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
         /* I/O operations must end the TB here (whether read or write) */
-        s->base.is_jmp = DISAS_UPDATE;
+        s->base.is_jmp = DISAS_UPDATE_EXIT;
     }
     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
         /*
@@ -1976,7 +2119,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
          * but allow this to be suppressed by the register definition
          * (usually only necessary to work around guest bugs).
          */
-        s->base.is_jmp = DISAS_UPDATE;
+        s->base.is_jmp = DISAS_UPDATE_EXIT;
     }
 
     may_gen_set_label(s, label);
@@ -2046,12 +2189,12 @@ static void disas_exc(DisasContext *s, uint32_t insn)
          * instruction works properly.
          */
         switch (op2_ll) {
-        case 1:                                                     /* SVC */
+        case 1: /* SVC */
             gen_ss_advance(s);
             gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
                                syn_aa64_svc(imm16), default_exception_el(s));
             break;
-        case 2:                                                     /* HVC */
+        case 2: /* HVC */
             if (s->current_el == 0) {
                 unallocated_encoding(s);
                 break;
@@ -2065,7 +2208,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
             gen_exception_insn(s, s->base.pc_next, EXCP_HVC,
                                syn_aa64_hvc(imm16), 2);
             break;
-        case 3:                                                     /* SMC */
+        case 3: /* SMC */
             if (s->current_el == 0) {
                 unallocated_encoding(s);
                 break;
@@ -2144,7 +2287,7 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     unsigned int opc, op2, op3, rn, op4;
-    unsigned btype_mod = 2;   /* 0: BR, 1: BLR, 2: other */
+    unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */
     TCGv_i64 dst;
     TCGv_i64 modifier;
 
@@ -2194,9 +2337,11 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
             if (s->pauth_active) {
                 dst = new_tmp_a64(s);
                 if (op3 == 2) {
-                    gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier);
+                    gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env,
+                                     cpu_reg(s, rn), modifier);
                 } else {
-                    gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier);
+                    gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env,
+                                     cpu_reg(s, rn), modifier);
                 }
             } else {
                 dst = cpu_reg(s, rn);
@@ -2226,9 +2371,11 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
             dst = new_tmp_a64(s);
             modifier = cpu_reg_sp(s, op4);
             if (op3 == 2) {
-                gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier);
+                gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn),
+                                 modifier);
             } else {
-                gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier);
+                gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn),
+                                 modifier);
             }
         } else {
             dst = cpu_reg(s, rn);
@@ -2268,9 +2415,11 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
             if (s->pauth_active) {
                 modifier = tcg_ctx->cpu_X[31];
                 if (op3 == 2) {
-                    gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, dst, modifier);
+                    gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, dst,
+                                     modifier);
                 } else {
-                    gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, dst, modifier);
+                    gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, dst,
+                                     modifier);
                 }
             }
             break;
@@ -2326,14 +2475,18 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
 {
     switch (extract32(insn, 25, 7)) {
-    case 0x0a: case 0x0b:
-    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
+    case 0x0a:
+    case 0x0b:
+    case 0x4a:
+    case 0x4b: /* Unconditional branch (immediate) */
         disas_uncond_b_imm(s, insn);
         break;
-    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
+    case 0x1a:
+    case 0x5a: /* Compare & branch (immediate) */
         disas_comp_b_imm(s, insn);
         break;
-    case 0x1b: case 0x5b: /* Test & branch (immediate) */
+    case 0x1b:
+    case 0x5b: /* Test & branch (immediate) */
         disas_test_b_imm(s, insn);
         break;
     case 0x2a: /* Conditional branch (immediate) */
@@ -2370,8 +2523,8 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
  * races in multi-threaded linux-user and when MTTCG softmmu is
  * enabled.
  */
-static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
-                               TCGv_i64 addr, int size, bool is_pair)
+static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i64 addr,
+                               int size, bool is_pair)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int idx = get_mem_index(s);
@@ -2383,13 +2536,18 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
         if (size == 2) {
             /* The pair must be single-copy atomic for the doubleword.  */
             memop |= MO_64 | MO_ALIGN;
-            tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx, memop);
+            tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx,
+                                memop);
             if (s->be_data == MO_LE) {
-                tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val, 0, 32);
-                tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2), tcg_ctx->cpu_exclusive_val, 32, 32);
+                tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt),
+                                    tcg_ctx->cpu_exclusive_val, 0, 32);
+                tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2),
+                                    tcg_ctx->cpu_exclusive_val, 32, 32);
             } else {
-                tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val, 32, 32);
-                tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2), tcg_ctx->cpu_exclusive_val, 0, 32);
+                tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt),
+                                    tcg_ctx->cpu_exclusive_val, 32, 32);
+                tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2),
+                                    tcg_ctx->cpu_exclusive_val, 0, 32);
             }
         } else {
             /* The pair must be single-copy atomic for *each* doubleword, not
@@ -2400,15 +2558,19 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
 
             TCGv_i64 addr2 = tcg_temp_new_i64(tcg_ctx);
             tcg_gen_addi_i64(tcg_ctx, addr2, addr, 8);
-            tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_high, addr2, idx, memop);
+            tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_high, addr2,
+                                idx, memop);
             tcg_temp_free_i64(tcg_ctx, addr2);
 
-            tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val);
-            tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt2), tcg_ctx->cpu_exclusive_high);
+            tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt),
+                            tcg_ctx->cpu_exclusive_val);
+            tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt2),
+                            tcg_ctx->cpu_exclusive_high);
         }
     } else {
         memop |= size | MO_ALIGN;
-        tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx, memop);
+        tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx,
+                            memop);
         tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val);
     }
     tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, addr);
@@ -2434,48 +2596,54 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     TCGLabel *done_label = gen_new_label(tcg_ctx);
     TCGv_i64 tmp;
 
-    tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, addr, tcg_ctx->cpu_exclusive_addr, fail_label);
+    tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, addr, tcg_ctx->cpu_exclusive_addr,
+                       fail_label);
 
     tmp = tcg_temp_new_i64(tcg_ctx);
     if (is_pair) {
         if (size == 2) {
             if (s->be_data == MO_LE) {
-                tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
+                tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt),
+                                     cpu_reg(s, rt2));
             } else {
-                tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
-            }
-            tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr,
-                                       tcg_ctx->cpu_exclusive_val, tmp,
-                                       get_mem_index(s),
-                                       MO_64 | MO_ALIGN | s->be_data);
-            tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, tcg_ctx->cpu_exclusive_val);
+                tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt2),
+                                     cpu_reg(s, rt));
+            }
+            tcg_gen_atomic_cmpxchg_i64(
+                tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr,
+                tcg_ctx->cpu_exclusive_val, tmp, get_mem_index(s),
+                MO_64 | MO_ALIGN | s->be_data);
+            tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp,
+                                tcg_ctx->cpu_exclusive_val);
         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
             if (!HAVE_CMPXCHG128) {
                 gen_helper_exit_atomic(tcg_ctx, tcg_ctx->cpu_env);
                 s->base.is_jmp = DISAS_NORETURN;
             } else if (s->be_data == MO_LE) {
-                gen_helper_paired_cmpxchg64_le_parallel(tcg_ctx, tmp, tcg_ctx->cpu_env,
-                                                        tcg_ctx->cpu_exclusive_addr,
-                                                        cpu_reg(s, rt),
-                                                        cpu_reg(s, rt2));
+                gen_helper_paired_cmpxchg64_le_parallel(
+                    tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr,
+                    cpu_reg(s, rt), cpu_reg(s, rt2));
             } else {
-                gen_helper_paired_cmpxchg64_be_parallel(tcg_ctx, tmp, tcg_ctx->cpu_env,
-                                                        tcg_ctx->cpu_exclusive_addr,
-                                                        cpu_reg(s, rt),
-                                                        cpu_reg(s, rt2));
+                gen_helper_paired_cmpxchg64_be_parallel(
+                    tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr,
+                    cpu_reg(s, rt), cpu_reg(s, rt2));
             }
         } else if (s->be_data == MO_LE) {
-            gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr,
+            gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env,
+                                           tcg_ctx->cpu_exclusive_addr,
                                            cpu_reg(s, rt), cpu_reg(s, rt2));
         } else {
-            gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr,
+            gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env,
+                                           tcg_ctx->cpu_exclusive_addr,
                                            cpu_reg(s, rt), cpu_reg(s, rt2));
         }
     } else {
-        tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, tcg_ctx->cpu_exclusive_val,
-                                   cpu_reg(s, rt), get_mem_index(s),
+        tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr,
+                                   tcg_ctx->cpu_exclusive_val, cpu_reg(s, rt),
+                                   get_mem_index(s),
                                    size | MO_ALIGN | s->be_data);
-        tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, tcg_ctx->cpu_exclusive_val);
+        tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp,
+                            tcg_ctx->cpu_exclusive_val);
     }
     tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rd), tmp);
     tcg_temp_free_i64(tcg_ctx, tmp);
@@ -2487,8 +2655,8 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, -1);
 }
 
-static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
-                                 int rn, int size)
+static void gen_compare_and_swap(DisasContext *s, int rs, int rt, int rn,
+                                 int size)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_rs = cpu_reg(s, rs);
@@ -2499,13 +2667,13 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
-    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
-    tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
-                               size | MO_ALIGN | s->be_data);
+    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
+    tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tcg_rs, clean_addr, tcg_rs, tcg_rt,
+                               memidx, size | MO_ALIGN | s->be_data);
 }
 
-static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
-                                      int rn, int size)
+static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, int rn,
+                                      int size)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 s1 = cpu_reg(s, rs);
@@ -2518,7 +2686,9 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
-    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+
+    /* This is a single atomic access, despite the "pair". */
+    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
 
     if (size == 2) {
         TCGv_i64 cmp = tcg_temp_new_i64(tcg_ctx);
@@ -2579,7 +2749,8 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
         /* If compare equal, write back new data, else write back old data.  */
         tcg_gen_movcond_i64(tcg_ctx, TCG_COND_NE, c1, c2, zero, t1, d1);
         tcg_gen_movcond_i64(tcg_ctx, TCG_COND_NE, c2, c2, zero, t2, d2);
-        tcg_gen_qemu_st_i64(tcg_ctx, c1, clean_addr, memidx, MO_64 | s->be_data);
+        tcg_gen_qemu_st_i64(tcg_ctx, c1, clean_addr, memidx,
+                            MO_64 | s->be_data);
         tcg_gen_qemu_st_i64(tcg_ctx, c2, a2, memidx, MO_64 | s->be_data);
         tcg_temp_free_i64(tcg_ctx, a2);
         tcg_temp_free_i64(tcg_ctx, c1);
@@ -2644,7 +2815,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         if (is_lasr) {
             tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_STRL);
         }
-        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
         gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
         return;
 
@@ -2653,7 +2824,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         if (rn == 31) {
             gen_check_sp_alignment(s);
         }
-        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        clean_addr =
+            gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
         s->is_ldex = true;
         gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
         if (is_lasr) {
@@ -2673,7 +2845,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
             gen_check_sp_alignment(s);
         }
         tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_STRL);
-        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
         do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
         return;
@@ -2689,13 +2861,15 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         if (rn == 31) {
             gen_check_sp_alignment(s);
         }
-        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        clean_addr =
+            gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
         tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ);
         return;
 
-    case 0x2: case 0x3: /* CASP / STXP */
+    case 0x2:
+    case 0x3:           /* CASP / STXP */
         if (size & 2) { /* STXP / STLXP */
             if (rn == 31) {
                 gen_check_sp_alignment(s);
@@ -2703,25 +2877,27 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
             if (is_lasr) {
                 tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_STRL);
             }
-            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+            clean_addr =
+                gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
             gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
             return;
         }
-        if (rt2 == 31
-            && ((rt | rs) & 1) == 0
-            && dc_isar_feature(aa64_atomics, s)) {
+        if (rt2 == 31 && ((rt | rs) & 1) == 0 &&
+            dc_isar_feature(aa64_atomics, s)) {
             /* CASP / CASPL */
             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
             return;
         }
         break;
 
-    case 0x6: case 0x7: /* CASPA / LDXP */
+    case 0x6:
+    case 0x7:           /* CASPA / LDXP */
         if (size & 2) { /* LDXP / LDAXP */
             if (rn == 31) {
                 gen_check_sp_alignment(s);
             }
-            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+            clean_addr =
+                gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
             s->is_ldex = true;
             gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
             if (is_lasr) {
@@ -2729,9 +2905,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
             }
             return;
         }
-        if (rt2 == 31
-            && ((rt | rs) & 1) == 0
-            && dc_isar_feature(aa64_atomics, s)) {
+        if (rt2 == 31 && ((rt | rs) & 1) == 0 &&
+            dc_isar_feature(aa64_atomics, s)) {
             /* CASPA / CASPAL */
             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
             return;
@@ -2802,8 +2977,8 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
         /* Only unsigned 32bit loads target 32bit registers.  */
         bool iss_sf = opc != 0;
 
-        do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
-                  true, rt, iss_sf, false);
+        do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false, true, rt,
+                  iss_sf, false);
     }
     tcg_temp_free_i64(tcg_ctx, clean_addr);
 }
@@ -2825,7 +3000,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
  * +-----+-------+---+---+-------+---+-------+-------+------+------+
  *
  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
- *      LDPSW                    01
+ *      LDPSW/STGP               01
  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
  *   V: 0 -> GPR, 1 -> Vector
  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
@@ -2851,6 +3026,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
     bool is_signed = false;
     bool postindex = false;
     bool wback = false;
+    bool set_tag = false;
 
     TCGv_i64 clean_addr, dirty_addr;
 
@@ -2863,6 +3039,14 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
 
     if (is_vector) {
         size = 2 + opc;
+    } else if (opc == 1 && !is_load) {
+        /* STGP */
+        if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
+            unallocated_encoding(s);
+            return;
+        }
+        size = 3;
+        set_tag = true;
     } else {
         size = 2 + extract32(opc, 1, 1);
         is_signed = extract32(opc, 0, 1);
@@ -2903,7 +3087,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
         return;
     }
 
-    offset <<= size;
+    offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
 
     if (rn == 31) {
         gen_check_sp_alignment(s);
@@ -2913,7 +3097,25 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
     if (!postindex) {
         tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, offset);
     }
-    clean_addr = clean_data_tbi(s, dirty_addr);
+
+    if (set_tag) {
+        if (!s->ata) {
+            /*
+             * TODO: We could rely on the stores below, at least for
+             * system mode, if we arrange to add MO_ALIGN_16.
+             */
+            gen_helper_stg_stub(tcg_ctx, tcg_ctx->cpu_env, dirty_addr);
+        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+            gen_helper_stg_parallel(tcg_ctx, tcg_ctx->cpu_env, dirty_addr,
+                                    dirty_addr);
+        } else {
+            gen_helper_stg(tcg_ctx, tcg_ctx->cpu_env, dirty_addr, dirty_addr);
+        }
+    }
+
+    clean_addr =
+        gen_mte_checkN(s, dirty_addr, !is_load, (wback || rn != 31) && !set_tag,
+                       size, 2 << size);
 
     if (is_vector) {
         if (is_load) {
@@ -2937,20 +3139,18 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
             /* Do not modify tcg_rt before recognizing any exception
              * from the second load.
              */
-            do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
-                      false, 0, false, false);
+            do_gpr_ld(s, tmp, clean_addr, size, is_signed, false, false, 0,
+                      false, false);
             tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 1ULL << size);
-            do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
-                      false, 0, false, false);
+            do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false, false, 0,
+                      false, false);
 
             tcg_gen_mov_i64(tcg_ctx, tcg_rt, tmp);
             tcg_temp_free_i64(tcg_ctx, tmp);
         } else {
-            do_gpr_st(s, tcg_rt, clean_addr, size,
-                      false, 0, false, false);
+            do_gpr_st(s, tcg_rt, clean_addr, size, false, 0, false, false);
             tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 1ULL << size);
-            do_gpr_st(s, tcg_rt2, clean_addr, size,
-                      false, 0, false, false);
+            do_gpr_st(s, tcg_rt2, clean_addr, size, false, 0, false, false);
         }
     }
 
@@ -2978,11 +3178,8 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
  */
-static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
-                                int opc,
-                                int size,
-                                int rt,
-                                bool is_vector)
+static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, int opc,
+                                int size, int rt, bool is_vector)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int rn = extract32(insn, 5, 5);
@@ -2995,6 +3192,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
     bool iss_valid = !is_vector;
     bool post_index;
     bool writeback;
+    int memidx;
 
     TCGv_i64 clean_addr, dirty_addr;
 
@@ -3052,7 +3250,11 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
     if (!post_index) {
         tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, imm9);
     }
-    clean_addr = clean_data_tbi(s, dirty_addr);
+
+    memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
+    clean_addr =
+        gen_mte_check1_mmuidx(s, dirty_addr, is_store, writeback || rn != 31,
+                              size, is_unpriv, memidx);
 
     if (is_vector) {
         if (is_store) {
@@ -3062,16 +3264,14 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
         }
     } else {
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
-        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
 
         if (is_store) {
-            do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
-                             iss_valid, rt, iss_sf, false);
+            do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, iss_valid, rt,
+                             iss_sf, false);
         } else {
-            do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
-                             is_signed, is_extended, memidx,
-                             iss_valid, rt, iss_sf, false);
+            do_gpr_ld_memidx(s, tcg_rt, clean_addr, size, is_signed,
+                             is_extended, memidx, iss_valid, rt, iss_sf, false);
         }
     }
 
@@ -3105,11 +3305,8 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
  * Rn: address register or SP for base
  * Rm: offset register or ZR for offset
  */
-static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
-                                   int opc,
-                                   int size,
-                                   int rt,
-                                   bool is_vector)
+static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, int opc,
+                                   int size, int rt, bool is_vector)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int rn = extract32(insn, 5, 5);
@@ -3160,7 +3357,7 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
     ext_and_shift_reg(tcg_ctx, tcg_rm, tcg_rm, opt, shift ? size : 0);
 
     tcg_gen_add_i64(tcg_ctx, dirty_addr, dirty_addr, tcg_rm);
-    clean_addr = clean_data_tbi(s, dirty_addr);
+    clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
 
     if (is_vector) {
         if (is_store) {
@@ -3172,12 +3369,10 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
         if (is_store) {
-            do_gpr_st(s, tcg_rt, clean_addr, size,
-                      true, rt, iss_sf, false);
+            do_gpr_st(s, tcg_rt, clean_addr, size, true, rt, iss_sf, false);
         } else {
-            do_gpr_ld(s, tcg_rt, clean_addr, size,
-                      is_signed, is_extended,
-                      true, rt, iss_sf, false);
+            do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended, true,
+                      rt, iss_sf, false);
         }
     }
 }
@@ -3199,11 +3394,8 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
  * Rn: base address register (inc SP)
  * Rt: target register
  */
-static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
-                                        int opc,
-                                        int size,
-                                        int rt,
-                                        bool is_vector)
+static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, int opc,
+                                        int size, int rt, bool is_vector)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int rn = extract32(insn, 5, 5);
@@ -3246,7 +3438,7 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
     dirty_addr = read_cpu_reg_sp(s, rn, 1);
     offset = imm12 << size;
     tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, offset);
-    clean_addr = clean_data_tbi(s, dirty_addr);
+    clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
 
     if (is_vector) {
         if (is_store) {
@@ -3258,11 +3450,10 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
         if (is_store) {
-            do_gpr_st(s, tcg_rt, clean_addr, size,
-                      true, rt, iss_sf, false);
+            do_gpr_st(s, tcg_rt, clean_addr, size, true, rt, iss_sf, false);
         } else {
-            do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended,
-                      true, rt, iss_sf, false);
+            do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended, true,
+                      rt, iss_sf, false);
         }
     }
 }
@@ -3281,8 +3472,8 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
  * A: acquire flag
  * R: release flag
  */
-static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
-                              int size, int rt, bool is_vector)
+static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int size, int rt,
+                              bool is_vector)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int rs = extract32(insn, 16, 5);
@@ -3291,7 +3482,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     bool r = extract32(insn, 22, 1);
     bool a = extract32(insn, 23, 1);
     TCGv_i64 tcg_rs, clean_addr;
-    AtomicThreeOpFn *fn;
+    AtomicThreeOpFn *fn = NULL;
 
     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
         unallocated_encoding(s);
@@ -3326,8 +3517,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
         fn = tcg_gen_atomic_xchg_i64;
         break;
     case 014: /* LDAPR, LDAPRH, LDAPRB */
-        if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
-            rs != 31 || a != 1 || r != 0) {
+        if (!dc_isar_feature(aa64_rcpc_8_3, s) || rs != 31 || a != 1 ||
+            r != 0) {
             unallocated_encoding(s);
             return;
         }
@@ -3340,7 +3531,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
-    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
 
     if (o3_opc == 014) {
         /*
@@ -3350,8 +3541,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
          * full load-acquire (we only need "load-acquire processor consistent"),
          * but we choose to implement them as full LDAQ.
          */
-        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false,
-                  true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
+        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
+                  disas_ldst_compute_iss_sf(size, false, 0), true);
         tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ);
         return;
     }
@@ -3384,8 +3575,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
  * W: pre-indexing flag
  * S: sign for imm9.
  */
-static void disas_ldst_pac(DisasContext *s, uint32_t insn,
-                           int size, int rt, bool is_vector)
+static void disas_ldst_pac(DisasContext *s, uint32_t insn, int size, int rt,
+                           bool is_vector)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int rn = extract32(insn, 5, 5);
@@ -3406,9 +3597,11 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn,
 
     if (s->pauth_active) {
         if (use_key_a) {
-            gen_helper_autda(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr, tcg_ctx->cpu_X[31]);
+            gen_helper_autda(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr,
+                             new_tmp_a64_zero(s));
         } else {
-            gen_helper_autdb(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr, tcg_ctx->cpu_X[31]);
+            gen_helper_autdb(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr,
+                             new_tmp_a64_zero(s));
         }
     }
 
@@ -3418,7 +3611,8 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn,
     tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, offset);
 
     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
-    clean_addr = clean_data_tbi(s, dirty_addr);
+    clean_addr =
+        gen_mte_check1(s, dirty_addr, false, is_wback || rn != 31, size);
 
     tcg_rt = cpu_reg(s, rt);
     do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
@@ -3507,8 +3701,8 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
          * Load-AcquirePC semantics; we implement as the slightly more
          * restrictive Load-Acquire.
          */
-        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend,
-                  true, rt, iss_sf, true);
+        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend, true,
+                  rt, iss_sf, true);
         tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ);
     }
 }
@@ -3582,10 +3776,10 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
     MemOp endian = s->be_data;
 
-    int ebytes;   /* bytes per element */
+    int total;    /* bytes per element */
     int elements; /* elements per vector */
-    int rpt;    /* num iterations */
-    int selem;  /* structure elements */
+    int rpt;      /* num iterations */
+    int selem;    /* structure elements */
     int r;
 
     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
@@ -3652,19 +3846,26 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
         endian = MO_LE;
     }
 
-    /* Consecutive little-endian elements from a single register
+    total = rpt * selem * (is_q ? 16 : 8);
+    tcg_rn = cpu_reg_sp(s, rn);
+
+    /*
+     * Issue the MTE check vs the logical repeat count, before we
+     * promote consecutive little-endian elements below.
+     */
+    clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
+                                size, total);
+
+    /*
+     * Consecutive little-endian elements from a single register
      * can be promoted to a larger little-endian operation.
      */
     if (selem == 1 && endian == MO_LE) {
         size = 3;
     }
-    ebytes = 1 << size;
-    elements = (is_q ? 16 : 8) / ebytes;
-
-    tcg_rn = cpu_reg_sp(s, rn);
-    clean_addr = clean_data_tbi(s, tcg_rn);
-    tcg_ebytes = tcg_const_i64(tcg_ctx, ebytes);
+    elements = (is_q ? 16 : 8) >> size;
 
+    tcg_ebytes = tcg_const_i64(tcg_ctx, 1 << size);
     for (r = 0; r < rpt; r++) {
         int e;
         for (e = 0; e < elements; e++) {
@@ -3698,7 +3899,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
 
     if (is_postidx) {
         if (rm == 31) {
-            tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, rpt * elements * selem * ebytes);
+            tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, total);
         } else {
             tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm));
         }
@@ -3745,7 +3946,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
     bool replicate = false;
     int index = is_q << 3 | S << 2 | size;
-    int ebytes, xs;
+    int xs, total;
     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
 
     if (extract32(insn, 31, 1)) {
@@ -3799,26 +4000,26 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
         return;
     }
 
-    ebytes = 1 << scale;
-
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
 
+    total = selem << scale;
     tcg_rn = cpu_reg_sp(s, rn);
-    clean_addr = clean_data_tbi(s, tcg_rn);
-    tcg_ebytes = tcg_const_i64(tcg_ctx, ebytes);
 
+    clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
+                                scale, total);
+
+    tcg_ebytes = tcg_const_i64(tcg_ctx, 1 << scale);
     for (xs = 0; xs < selem; xs++) {
         if (replicate) {
             /* Load and replicate to all elements */
             TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
 
-            tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, clean_addr,
-                                get_mem_index(s), s->be_data + scale);
+            tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, clean_addr, get_mem_index(s),
+                                s->be_data + scale);
             tcg_gen_gvec_dup_i64(tcg_ctx, scale, vec_full_reg_offset(s, rt),
-                                 (is_q + 1) * 8, vec_full_reg_size(s),
-                                 tcg_tmp);
+                                 (is_q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
             tcg_temp_free_i64(tcg_ctx, tcg_tmp);
         } else {
             /* Load/store one element per register */
@@ -3835,19 +4036,235 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
 
     if (is_postidx) {
         if (rm == 31) {
-            tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, selem * ebytes);
+            tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, total);
         } else {
             tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm));
         }
     }
 }
 
+/*
+ * Load/Store memory tags
+ *
+ *  31 30 29         24     22  21     12    10      5      0
+ * +-----+-------------+-----+---+------+-----+------+------+
+ * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
+ * +-----+-------------+-----+---+------+-----+------+------+
+ */
+static void disas_ldst_tag(DisasContext *s, uint32_t insn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    if (HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_READ, s->pc_curr) ||
+        HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_WRITE, s->pc_curr)) {
+        // sync PC if there are memory hooks.
+        // TODO: Better granularity by checking ldst type and corresponding hook
+        // type
+        gen_a64_set_pc_im(s->uc->tcg_ctx, s->pc_curr);
+    }
+
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
+    int op2 = extract32(insn, 10, 2);
+    int op1 = extract32(insn, 22, 2);
+    bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
+    int index = 0;
+    TCGv_i64 addr, clean_addr, tcg_rt;
+
+    /* We checked insn bits [29:24,21] in the caller.  */
+    if (extract32(insn, 30, 2) != 3) {
+        goto do_unallocated;
+    }
+
+    /*
+     * @index is a tri-state variable which has 3 states:
+     * < 0 : post-index, writeback
+     * = 0 : signed offset
+     * > 0 : pre-index, writeback
+     */
+    switch (op1) {
+    case 0:
+        if (op2 != 0) {
+            /* STG */
+            index = op2 - 2;
+        } else {
+            /* STZGM */
+            if (s->current_el == 0 || offset != 0) {
+                goto do_unallocated;
+            }
+            is_mult = is_zero = true;
+        }
+        break;
+    case 1:
+        if (op2 != 0) {
+            /* STZG */
+            is_zero = true;
+            index = op2 - 2;
+        } else {
+            /* LDG */
+            is_load = true;
+        }
+        break;
+    case 2:
+        if (op2 != 0) {
+            /* ST2G */
+            is_pair = true;
+            index = op2 - 2;
+        } else {
+            /* STGM */
+            if (s->current_el == 0 || offset != 0) {
+                goto do_unallocated;
+            }
+            is_mult = true;
+        }
+        break;
+    case 3:
+        if (op2 != 0) {
+            /* STZ2G */
+            is_pair = is_zero = true;
+            index = op2 - 2;
+        } else {
+            /* LDGM */
+            if (s->current_el == 0 || offset != 0) {
+                goto do_unallocated;
+            }
+            is_mult = is_load = true;
+        }
+        break;
+
+    default:
+    do_unallocated:
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (is_mult ? !dc_isar_feature(aa64_mte, s)
+                : !dc_isar_feature(aa64_mte_insn_reg, s)) {
+        goto do_unallocated;
+    }
+
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+
+    addr = read_cpu_reg_sp(s, rn, true);
+    if (index >= 0) {
+        /* pre-index or signed offset */
+        tcg_gen_addi_i64(tcg_ctx, addr, addr, offset);
+    }
+
+    if (is_mult) {
+        tcg_rt = cpu_reg(s, rt);
+
+        if (is_zero) {
+            int size = 4 << s->dcz_blocksize;
+
+            if (s->ata) {
+                gen_helper_stzgm_tags(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt);
+            }
+            /*
+             * The non-tags portion of STZGM is mostly like DC_ZVA,
+             * except the alignment happens before the access.
+             */
+            clean_addr = clean_data_tbi(s, addr);
+            tcg_gen_andi_i64(tcg_ctx, clean_addr, clean_addr, -size);
+            gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, clean_addr);
+        } else if (s->ata) {
+            if (is_load) {
+                gen_helper_ldgm(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, addr);
+            } else {
+                gen_helper_stgm(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt);
+            }
+        } else {
+            MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
+            int size = 4 << GMID_EL1_BS;
+
+            clean_addr = clean_data_tbi(s, addr);
+            tcg_gen_andi_i64(tcg_ctx, clean_addr, clean_addr, -size);
+            gen_probe_access(s, clean_addr, acc, size);
+
+            if (is_load) {
+                /* The result tags are zeros.  */
+                tcg_gen_movi_i64(tcg_ctx, tcg_rt, 0);
+            }
+        }
+        return;
+    }
+
+    if (is_load) {
+        tcg_gen_andi_i64(tcg_ctx, addr, addr, -TAG_GRANULE);
+        tcg_rt = cpu_reg(s, rt);
+        if (s->ata) {
+            gen_helper_ldg(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, addr, tcg_rt);
+        } else {
+            clean_addr = clean_data_tbi(s, addr);
+            gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
+            gen_address_with_allocation_tag0(tcg_ctx, tcg_rt, addr);
+        }
+    } else {
+        tcg_rt = cpu_reg_sp(s, rt);
+        if (!s->ata) {
+            /*
+             * For STG and ST2G, we need to check alignment and probe memory.
+             * TODO: For STZG and STZ2G, we could rely on the stores below,
+             * at least for system mode; user-only won't enforce alignment.
+             */
+            if (is_pair) {
+                gen_helper_st2g_stub(tcg_ctx, tcg_ctx->cpu_env, addr);
+            } else {
+                gen_helper_stg_stub(tcg_ctx, tcg_ctx->cpu_env, addr);
+            }
+        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+            if (is_pair) {
+                gen_helper_st2g_parallel(tcg_ctx, tcg_ctx->cpu_env, addr,
+                                         tcg_rt);
+            } else {
+                gen_helper_stg_parallel(tcg_ctx, tcg_ctx->cpu_env, addr,
+                                        tcg_rt);
+            }
+        } else {
+            if (is_pair) {
+                gen_helper_st2g(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt);
+            } else {
+                gen_helper_stg(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt);
+            }
+        }
+    }
+
+    if (is_zero) {
+        TCGv_i64 clean_addr = clean_data_tbi(s, addr);
+        TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
+        int mem_index = get_mem_index(s);
+        int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
+
+        tcg_gen_qemu_st_i64(tcg_ctx, tcg_zero, clean_addr, mem_index,
+                            MO_Q | MO_ALIGN_16);
+        for (i = 8; i < n; i += 8) {
+            tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8);
+            tcg_gen_qemu_st_i64(tcg_ctx, tcg_zero, clean_addr, mem_index, MO_Q);
+        }
+        tcg_temp_free_i64(tcg_ctx, tcg_zero);
+    }
+
+    if (index != 0) {
+        /* pre-index or post-index */
+        if (index < 0) {
+            /* post-index */
+            tcg_gen_addi_i64(tcg_ctx, addr, addr, offset);
+        }
+        tcg_gen_mov_i64(tcg_ctx, cpu_reg_sp(s, rn), addr);
+    }
+}
+
 /* Loads and stores */
 static void disas_ldst(DisasContext *s, uint32_t insn)
 {
-    if (HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_READ, s->pc_curr) || HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_WRITE, s->pc_curr)) {
+    if (HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_READ, s->pc_curr) ||
+        HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_WRITE, s->pc_curr)) {
         // sync PC if there are memory hooks.
-        // TODO: Better granularity by checking ldst type and corresponding hook type
+        // TODO: Better granularity by checking ldst type and corresponding hook
+        // type
         gen_a64_set_pc_im(s->uc->tcg_ctx, s->pc_curr);
     }
 
@@ -3855,15 +4272,20 @@ static void disas_ldst(DisasContext *s, uint32_t insn)
     case 0x08: /* Load/store exclusive */
         disas_ldst_excl(s, insn);
         break;
-    case 0x18: case 0x1c: /* Load register (literal) */
+    case 0x18:
+    case 0x1c: /* Load register (literal) */
         disas_ld_lit(s, insn);
         break;
-    case 0x28: case 0x29:
-    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
+    case 0x28:
+    case 0x29:
+    case 0x2c:
+    case 0x2d: /* Load/store pair (all forms) */
         disas_ldst_pair(s, insn);
         break;
-    case 0x38: case 0x39:
-    case 0x3c: case 0x3d: /* Load/store register (all forms) */
+    case 0x38:
+    case 0x39:
+    case 0x3c:
+    case 0x3d: /* Load/store register (all forms) */
         disas_ldst_reg(s, insn);
         break;
     case 0x0c: /* AdvSIMD load/store multiple structures */
@@ -3872,13 +4294,14 @@ static void disas_ldst(DisasContext *s, uint32_t insn)
     case 0x0d: /* AdvSIMD load/store single structure */
         disas_ldst_single_struct(s, insn);
         break;
-    case 0x19: /* LDAPR/STLR (unscaled immediate) */
-        if (extract32(insn, 10, 2) != 0 ||
-            extract32(insn, 21, 1) != 0) {
+    case 0x19:
+        if (extract32(insn, 21, 1) != 0) {
+            disas_ldst_tag(s, insn);
+        } else if (extract32(insn, 10, 2) == 0) {
+            disas_ldst_ldapr_stlr(s, insn);
+        } else {
             unallocated_encoding(s);
-            break;
         }
-        disas_ldst_ldapr_stlr(s, insn);
         break;
     default:
         unallocated_encoding(s);
@@ -3919,14 +4342,14 @@ static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
  * Add/subtract (immediate)
  *
  *  31 30 29 28       24 23 22 21         10 9   5 4   0
- * +--+--+--+-----------+-----+-------------+-----+-----+
- * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
- * +--+--+--+-----------+-----+-------------+-----+-----+
+ * +--+--+--+-------------+--+-------------+-----+-----+
+ * |sf|op| S| 1 0 0 0 1 0 |sh|    imm12    |  Rn | Rd  |
+ * +--+--+--+-------------+--+-------------+-----+-----+
  *
  *    sf: 0 -> 32bit, 1 -> 64bit
  *    op: 0 -> add  , 1 -> sub
  *     S: 1 -> set flags
- * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
+ *    sh: 1 -> LSL imm by 12
  */
 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
 {
@@ -3934,7 +4357,7 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
     int rd = extract32(insn, 0, 5);
     int rn = extract32(insn, 5, 5);
     uint64_t imm = extract32(insn, 10, 12);
-    int shift = extract32(insn, 22, 2);
+    bool shift = extract32(insn, 22, 1);
     bool setflags = extract32(insn, 29, 1);
     bool sub_op = extract32(insn, 30, 1);
     bool is_64bit = extract32(insn, 31, 1);
@@ -3943,13 +4366,8 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
     TCGv_i64 tcg_result;
 
-    switch (shift) {
-    case 0x0:
-        break;
-    case 0x1:
+    if (shift) {
         imm <<= 12;
-        break;
-    default:
         unallocated_encoding(s);
         return;
     }
@@ -3980,6 +4398,57 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
     tcg_temp_free_i64(tcg_ctx, tcg_result);
 }
 
+/*
+ * Add/subtract (immediate, with tags)
+ *
+ *  31 30 29 28         23 22 21     16 14      10 9   5 4   0
+ * +--+--+--+-------------+--+---------+--+-------+-----+-----+
+ * |sf|op| S| 1 0 0 0 1 1 |o2|  uimm6  |o3| uimm4 |  Rn | Rd  |
+ * +--+--+--+-------------+--+---------+--+-------+-----+-----+
+ *
+ *    op: 0 -> add, 1 -> sub
+ */
+static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int uimm4 = extract32(insn, 10, 4);
+    int uimm6 = extract32(insn, 16, 6);
+    bool sub_op = extract32(insn, 30, 1);
+    TCGv_i64 tcg_rn, tcg_rd;
+    int imm;
+
+    /* Test all of sf=1, S=0, o2=0, o3=0.  */
+    if ((insn & 0xa040c000u) != 0x80000000u ||
+        !dc_isar_feature(aa64_mte_insn_reg, s)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    imm = uimm6 << LOG2_TAG_GRANULE;
+    if (sub_op) {
+        imm = -imm;
+    }
+
+    tcg_rn = cpu_reg_sp(s, rn);
+    tcg_rd = cpu_reg_sp(s, rd);
+
+    if (s->ata) {
+        TCGv_i32 offset = tcg_const_i32(tcg_ctx, imm);
+        TCGv_i32 tag_offset = tcg_const_i32(tcg_ctx, uimm4);
+
+        gen_helper_addsubg(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, offset,
+                           tag_offset);
+        tcg_temp_free_i32(tcg_ctx, tag_offset);
+        tcg_temp_free_i32(tcg_ctx, offset);
+    } else {
+        tcg_gen_addi_i64(tcg_ctx, tcg_rd, tcg_rn, imm);
+        gen_address_with_allocation_tag0(tcg_ctx, tcg_rd, tcg_rd);
+    }
+}
+
 /* The input should be a value in the bottom e bits (with higher
  * bits zero); returns that value replicated into every element
  * of size e in a 64 bit integer.
@@ -4267,7 +4736,7 @@ static void disas_bitfield(DisasContext *s, uint32_t insn)
         return;
     }
 
- done:
+done:
     if (!sf) { /* zero extend final result */
         tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
     }
@@ -4340,12 +4809,16 @@ static void disas_extract(DisasContext *s, uint32_t insn)
 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
 {
     switch (extract32(insn, 23, 6)) {
-    case 0x20: case 0x21: /* PC-rel. addressing */
+    case 0x20:
+    case 0x21: /* PC-rel. addressing */
         disas_pc_rel_adr(s, insn);
         break;
-    case 0x22: case 0x23: /* Add/subtract (immediate) */
+    case 0x22: /* Add/subtract (immediate) */
         disas_add_sub_imm(s, insn);
         break;
+    case 0x23: /* Add/subtract (immediate, with tags) */
+        disas_add_sub_imm_with_tags(s, insn);
+        break;
     case 0x24: /* Logical (immediate) */
         disas_logic_imm(s, insn);
         break;
@@ -4414,8 +4887,9 @@ static void shift_reg(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, int sf,
  * The shift amount must be in range (this should always be true as the
  * relevant instructions will UNDEF on bad shift immediates).
  */
-static void shift_reg_imm(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, int sf,
-                          enum a64_shift_type shift_type, unsigned int shift_i)
+static void shift_reg_imm(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src,
+                          int sf, enum a64_shift_type shift_type,
+                          unsigned int shift_i)
 {
     assert(shift_i < (sf ? 64 : 32));
 
@@ -4675,9 +5149,8 @@ static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
     int rn = extract32(insn, 5, 5);
     int ra = extract32(insn, 10, 5);
     int rm = extract32(insn, 16, 5);
-    int op_id = (extract32(insn, 29, 3) << 4) |
-        (extract32(insn, 21, 3) << 1) |
-        extract32(insn, 15, 1);
+    int op_id = (extract32(insn, 29, 3) << 4) | (extract32(insn, 21, 3) << 1) |
+                extract32(insn, 15, 1);
     bool sf = extract32(insn, 31, 1);
     bool is_sub = extract32(op_id, 0, 1);
     bool is_high = extract32(op_id, 2, 1);
@@ -4693,8 +5166,8 @@ static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
     case 0x44: /* SMULH */
         is_signed = true;
         break;
-    case 0x0: /* MADD (32bit) */
-    case 0x1: /* MSUB (32bit) */
+    case 0x0:  /* MADD (32bit) */
+    case 0x1:  /* MSUB (32bit) */
     case 0x40: /* MADD (64bit) */
     case 0x41: /* MSUB (64bit) */
     case 0x4a: /* UMADDL */
@@ -4866,7 +5339,7 @@ static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
         unallocated_encoding(s);
         return;
     }
-    shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
+    shift = sz ? 16 : 24; /* SETF16 or SETF8 */
 
     tmp = tcg_temp_new_i32(tcg_ctx);
     tcg_gen_extrl_i64_i32(tcg_ctx, tmp, cpu_reg(s, rn));
@@ -5016,7 +5489,8 @@ static void disas_cond_select(DisasContext *s, uint32_t insn)
 
     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
         /* CSET & CSETM.  */
-        tcg_gen_setcond_i64(tcg_ctx, tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
+        tcg_gen_setcond_i64(tcg_ctx, tcg_invert_cond(c.cond), tcg_rd, c.value,
+                            zero);
         if (else_inv) {
             tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rd);
         }
@@ -5030,7 +5504,8 @@ static void disas_cond_select(DisasContext *s, uint32_t insn)
         } else if (else_inc) {
             tcg_gen_addi_i64(tcg_ctx, t_false, t_false, 1);
         }
-        tcg_gen_movcond_i64(tcg_ctx, c.cond, tcg_rd, c.value, zero, t_true, t_false);
+        tcg_gen_movcond_i64(tcg_ctx, c.cond, tcg_rd, c.value, zero, t_true,
+                            t_false);
     }
 
     tcg_temp_free_i64(tcg_ctx, zero);
@@ -5041,8 +5516,8 @@ static void disas_cond_select(DisasContext *s, uint32_t insn)
     }
 }
 
-static void handle_clz(DisasContext *s, unsigned int sf,
-                       unsigned int rn, unsigned int rd)
+static void handle_clz(DisasContext *s, unsigned int sf, unsigned int rn,
+                       unsigned int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_rd, tcg_rn;
@@ -5060,8 +5535,8 @@ static void handle_clz(DisasContext *s, unsigned int sf,
     }
 }
 
-static void handle_cls(DisasContext *s, unsigned int sf,
-                       unsigned int rn, unsigned int rd)
+static void handle_cls(DisasContext *s, unsigned int sf, unsigned int rn,
+                       unsigned int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_rd, tcg_rn;
@@ -5079,8 +5554,8 @@ static void handle_cls(DisasContext *s, unsigned int sf,
     }
 }
 
-static void handle_rbit(DisasContext *s, unsigned int sf,
-                        unsigned int rn, unsigned int rd)
+static void handle_rbit(DisasContext *s, unsigned int sf, unsigned int rn,
+                        unsigned int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_rd, tcg_rn;
@@ -5099,8 +5574,8 @@ static void handle_rbit(DisasContext *s, unsigned int sf,
 }
 
 /* REV with sf==1, opcode==3 ("REV64") */
-static void handle_rev64(DisasContext *s, unsigned int sf,
-                         unsigned int rn, unsigned int rd)
+static void handle_rev64(DisasContext *s, unsigned int sf, unsigned int rn,
+                         unsigned int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     if (!sf) {
@@ -5113,8 +5588,8 @@ static void handle_rev64(DisasContext *s, unsigned int sf,
 /* REV with sf==0, opcode==2
  * REV32 (sf==1, opcode==2)
  */
-static void handle_rev32(DisasContext *s, unsigned int sf,
-                         unsigned int rn, unsigned int rd)
+static void handle_rev32(DisasContext *s, unsigned int sf, unsigned int rn,
+                         unsigned int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_rd = cpu_reg(s, rd);
@@ -5138,14 +5613,15 @@ static void handle_rev32(DisasContext *s, unsigned int sf,
 }
 
 /* REV16 (opcode==1) */
-static void handle_rev16(DisasContext *s, unsigned int sf,
-                         unsigned int rn, unsigned int rd)
+static void handle_rev16(DisasContext *s, unsigned int sf, unsigned int rn,
+                         unsigned int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_rd = cpu_reg(s, rd);
     TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
-    TCGv_i64 mask = tcg_const_i64(tcg_ctx, sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
+    TCGv_i64 mask =
+        tcg_const_i64(tcg_ctx, sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
 
     tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 8);
     tcg_gen_and_i64(tcg_ctx, tcg_rd, tcg_rn, mask);
@@ -5209,7 +5685,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
     case MAP(1, 0x01, 0x00): /* PACIA */
         if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn));
+            gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             cpu_reg_sp(s, rn));
         } else if (!dc_isar_feature(aa64_pauth, s)) {
             goto do_unallocated;
         }
@@ -5217,7 +5694,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
     case MAP(1, 0x01, 0x01): /* PACIB */
         if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn));
+            gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             cpu_reg_sp(s, rn));
         } else if (!dc_isar_feature(aa64_pauth, s)) {
             goto do_unallocated;
         }
@@ -5225,7 +5703,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
     case MAP(1, 0x01, 0x02): /* PACDA */
         if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn));
+            gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             cpu_reg_sp(s, rn));
         } else if (!dc_isar_feature(aa64_pauth, s)) {
             goto do_unallocated;
         }
@@ -5233,7 +5712,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
     case MAP(1, 0x01, 0x03): /* PACDB */
         if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn));
+            gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             cpu_reg_sp(s, rn));
         } else if (!dc_isar_feature(aa64_pauth, s)) {
             goto do_unallocated;
         }
@@ -5241,7 +5721,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
     case MAP(1, 0x01, 0x04): /* AUTIA */
         if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn));
+            gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             cpu_reg_sp(s, rn));
         } else if (!dc_isar_feature(aa64_pauth, s)) {
             goto do_unallocated;
         }
@@ -5249,7 +5730,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
     case MAP(1, 0x01, 0x05): /* AUTIB */
         if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn));
+            gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             cpu_reg_sp(s, rn));
         } else if (!dc_isar_feature(aa64_pauth, s)) {
             goto do_unallocated;
         }
@@ -5257,7 +5739,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
     case MAP(1, 0x01, 0x06): /* AUTDA */
         if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn));
+            gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             cpu_reg_sp(s, rn));
         } else if (!dc_isar_feature(aa64_pauth, s)) {
             goto do_unallocated;
         }
@@ -5265,7 +5748,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
     case MAP(1, 0x01, 0x07): /* AUTDB */
         if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn));
+            gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             cpu_reg_sp(s, rn));
         } else if (!dc_isar_feature(aa64_pauth, s)) {
             goto do_unallocated;
         }
@@ -5275,7 +5759,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
             goto do_unallocated;
         } else if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s));
+            gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             new_tmp_a64_zero(s));
         }
         break;
     case MAP(1, 0x01, 0x09): /* PACIZB */
@@ -5283,7 +5768,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
             goto do_unallocated;
         } else if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s));
+            gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             new_tmp_a64_zero(s));
         }
         break;
     case MAP(1, 0x01, 0x0a): /* PACDZA */
@@ -5291,7 +5777,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
             goto do_unallocated;
         } else if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s));
+            gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             new_tmp_a64_zero(s));
         }
         break;
     case MAP(1, 0x01, 0x0b): /* PACDZB */
@@ -5299,7 +5786,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
             goto do_unallocated;
         } else if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s));
+            gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             new_tmp_a64_zero(s));
         }
         break;
     case MAP(1, 0x01, 0x0c): /* AUTIZA */
@@ -5307,7 +5795,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
             goto do_unallocated;
         } else if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s));
+            gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             new_tmp_a64_zero(s));
         }
         break;
     case MAP(1, 0x01, 0x0d): /* AUTIZB */
@@ -5315,7 +5804,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
             goto do_unallocated;
         } else if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s));
+            gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             new_tmp_a64_zero(s));
         }
         break;
     case MAP(1, 0x01, 0x0e): /* AUTDZA */
@@ -5323,7 +5813,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
             goto do_unallocated;
         } else if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s));
+            gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             new_tmp_a64_zero(s));
         }
         break;
     case MAP(1, 0x01, 0x0f): /* AUTDZB */
@@ -5331,7 +5822,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
             goto do_unallocated;
         } else if (s->pauth_active) {
             tcg_rd = cpu_reg(s, rd);
-            gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s));
+            gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd,
+                             new_tmp_a64_zero(s));
         }
         break;
     case MAP(1, 0x01, 0x10): /* XPACI */
@@ -5388,9 +5880,9 @@ static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
 }
 
 /* LSLV, LSRV, ASRV, RORV */
-static void handle_shift_reg(DisasContext *s,
-                             enum a64_shift_type shift_type, unsigned int sf,
-                             unsigned int rm, unsigned int rn, unsigned int rd)
+static void handle_shift_reg(DisasContext *s, enum a64_shift_type shift_type,
+                             unsigned int sf, unsigned int rm, unsigned int rn,
+                             unsigned int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_shift = tcg_temp_new_i64(tcg_ctx);
@@ -5403,17 +5895,16 @@ static void handle_shift_reg(DisasContext *s,
 }
 
 /* CRC32[BHWX], CRC32C[BHWX] */
-static void handle_crc32(DisasContext *s,
-                         unsigned int sf, unsigned int sz, bool crc32c,
-                         unsigned int rm, unsigned int rn, unsigned int rd)
+static void handle_crc32(DisasContext *s, unsigned int sf, unsigned int sz,
+                         bool crc32c, unsigned int rm, unsigned int rn,
+                         unsigned int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_acc, tcg_val;
     TCGv_i32 tcg_bytes;
 
-    if (!dc_isar_feature(aa64_crc32, s)
-        || (sf == 1 && sz != 3)
-        || (sf == 0 && sz == 3)) {
+    if (!dc_isar_feature(aa64_crc32, s) || (sf == 1 && sz != 3) ||
+        (sf == 0 && sz == 3)) {
         unallocated_encoding(s);
         return;
     }
@@ -5443,9 +5934,11 @@ static void handle_crc32(DisasContext *s,
     tcg_bytes = tcg_const_i32(tcg_ctx, 1 << sz);
 
     if (crc32c) {
-        gen_helper_crc32c_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
+        gen_helper_crc32c_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val,
+                             tcg_bytes);
     } else {
-        gen_helper_crc32_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
+        gen_helper_crc32_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val,
+                            tcg_bytes);
     }
 
     tcg_temp_free_i32(tcg_ctx, tcg_bytes);
@@ -5460,25 +5953,72 @@ static void handle_crc32(DisasContext *s,
 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    unsigned int sf, rm, opcode, rn, rd;
+    unsigned int sf, rm, opcode, rn, rd, setflag;
     sf = extract32(insn, 31, 1);
+    setflag = extract32(insn, 29, 1);
     rm = extract32(insn, 16, 5);
     opcode = extract32(insn, 10, 6);
     rn = extract32(insn, 5, 5);
     rd = extract32(insn, 0, 5);
 
-    if (extract32(insn, 29, 1)) {
+    if (setflag && opcode != 0) {
         unallocated_encoding(s);
         return;
     }
 
     switch (opcode) {
+    case 0: /* SUBP(S) */
+        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+            goto do_unallocated;
+        } else {
+            TCGv_i64 tcg_n, tcg_m, tcg_d;
+
+            tcg_n = read_cpu_reg_sp(s, rn, true);
+            tcg_m = read_cpu_reg_sp(s, rm, true);
+            tcg_gen_sextract_i64(tcg_ctx, tcg_n, tcg_n, 0, 56);
+            tcg_gen_sextract_i64(tcg_ctx, tcg_m, tcg_m, 0, 56);
+            tcg_d = cpu_reg(s, rd);
+
+            if (setflag) {
+                gen_sub_CC(tcg_ctx, true, tcg_d, tcg_n, tcg_m);
+            } else {
+                tcg_gen_sub_i64(tcg_ctx, tcg_d, tcg_n, tcg_m);
+            }
+        }
+        break;
     case 2: /* UDIV */
         handle_div(s, false, sf, rm, rn, rd);
         break;
     case 3: /* SDIV */
         handle_div(s, true, sf, rm, rn, rd);
         break;
+    case 4: /* IRG */
+        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+            goto do_unallocated;
+        }
+        if (s->ata) {
+            gen_helper_irg(tcg_ctx, cpu_reg_sp(s, rd), tcg_ctx->cpu_env,
+                           cpu_reg_sp(s, rn), cpu_reg(s, rm));
+        } else {
+            gen_address_with_allocation_tag0(tcg_ctx, cpu_reg_sp(s, rd),
+                                             cpu_reg_sp(s, rn));
+        }
+        break;
+    case 5: /* GMI */
+        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+            goto do_unallocated;
+        } else {
+            TCGv_i64 t1 = tcg_const_i64(tcg_ctx, 1);
+            TCGv_i64 t2 = tcg_temp_new_i64(tcg_ctx);
+
+            tcg_gen_extract_i64(tcg_ctx, t2, cpu_reg_sp(s, rn), 56, 4);
+            tcg_gen_shl_i64(tcg_ctx, t1, t1, t2);
+            tcg_gen_or_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, rm), t1);
+
+            tcg_temp_free_i64(tcg_ctx, t1);
+            tcg_temp_free_i64(tcg_ctx, t2);
+        }
+        break;
     case 8: /* LSLV */
         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
         break;
@@ -5573,7 +6113,7 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
         }
         break;
 
-    case 0x2: /* Conditional compare */
+    case 0x2:              /* Conditional compare */
         disas_cc(s, insn); /* both imm and reg forms */
         break;
 
@@ -5581,10 +6121,10 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
         disas_cond_select(s, insn);
         break;
 
-    case 0x6: /* Data-processing */
-        if (op0) {    /* (1 source) */
+    case 0x6:      /* Data-processing */
+        if (op0) { /* (1 source) */
             disas_data_proc_1src(s, insn);
-        } else {      /* (2 source) */
+        } else { /* (2 source) */
             disas_data_proc_2src(s, insn);
         }
         break;
@@ -5606,9 +6146,9 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
     }
 }
 
-static void handle_fp_compare(DisasContext *s, int size,
-                              unsigned int rn, unsigned int rm,
-                              bool cmp_with_zero, bool signal_all_nans)
+static void handle_fp_compare(DisasContext *s, int size, unsigned int rn,
+                              unsigned int rm, bool cmp_with_zero,
+                              bool signal_all_nans)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_flags = tcg_temp_new_i64(tcg_ctx);
@@ -5644,16 +6184,20 @@ static void handle_fp_compare(DisasContext *s, int size,
         switch (size) {
         case MO_32:
             if (signal_all_nans) {
-                gen_helper_vfp_cmpes_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
+                gen_helper_vfp_cmpes_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm,
+                                         fpst);
             } else {
-                gen_helper_vfp_cmps_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
+                gen_helper_vfp_cmps_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm,
+                                        fpst);
             }
             break;
         case MO_16:
             if (signal_all_nans) {
-                gen_helper_vfp_cmpeh_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
+                gen_helper_vfp_cmpeh_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm,
+                                         fpst);
             } else {
-                gen_helper_vfp_cmph_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
+                gen_helper_vfp_cmph_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm,
+                                        fpst);
             }
             break;
         default:
@@ -5844,7 +6388,8 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
 
     a64_test_cc(tcg_ctx, &c, cond);
     t_zero = tcg_const_i64(tcg_ctx, 0);
-    tcg_gen_movcond_i64(tcg_ctx, c.cond, t_true, c.value, t_zero, t_true, t_false);
+    tcg_gen_movcond_i64(tcg_ctx, c.cond, t_true, c.value, t_zero, t_true,
+                        t_false);
     tcg_temp_free_i64(tcg_ctx, t_zero);
     tcg_temp_free_i64(tcg_ctx, t_false);
     a64_free_cc(tcg_ctx, &c);
@@ -5883,7 +6428,8 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
     case 0xb: /* FRINTZ */
     case 0xc: /* FRINTA */
     {
-        TCGv_i32 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7));
+        TCGv_i32 tcg_rmode =
+            tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7));
         fpst = get_fpstatus_ptr(tcg_ctx, true);
 
         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst);
@@ -5983,7 +6529,7 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
     }
     tcg_temp_free_ptr(tcg_ctx, fpst);
 
- done:
+done:
     write_fp_sreg(s, rd, tcg_res);
     tcg_temp_free_i32(tcg_ctx, tcg_op);
     tcg_temp_free_i32(tcg_ctx, tcg_res);
@@ -6061,19 +6607,18 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
     }
     tcg_temp_free_ptr(tcg_ctx, fpst);
 
- done:
+done:
     write_fp_dreg(s, rd, tcg_res);
     tcg_temp_free_i64(tcg_ctx, tcg_op);
     tcg_temp_free_i64(tcg_ctx, tcg_res);
 }
 
-static void handle_fp_fcvt(DisasContext *s, int opcode,
-                           int rd, int rn, int dtype, int ntype)
+static void handle_fp_fcvt(DisasContext *s, int opcode, int rd, int rn,
+                           int dtype, int ntype)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     switch (ntype) {
-    case 0x0:
-    {
+    case 0x0: {
         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
         if (dtype == 1) {
             /* Single to double */
@@ -6097,8 +6642,7 @@ static void handle_fp_fcvt(DisasContext *s, int opcode,
         tcg_temp_free_i32(tcg_ctx, tcg_rn);
         break;
     }
-    case 0x1:
-    {
+    case 0x1: {
         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
         TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
         if (dtype == 0) {
@@ -6117,9 +6661,8 @@ static void handle_fp_fcvt(DisasContext *s, int opcode,
         tcg_temp_free_i32(tcg_ctx, tcg_rd);
         tcg_temp_free_i64(tcg_ctx, tcg_rn);
         break;
-    }
-    case 0x3:
-    {
+    }
+    case 0x3: {
         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
         TCGv_ptr tcg_fpst = get_fpstatus_ptr(tcg_ctx, false);
         TCGv_i32 tcg_ahp = get_ahp_flag(tcg_ctx);
@@ -6127,13 +6670,15 @@ static void handle_fp_fcvt(DisasContext *s, int opcode,
         if (dtype == 0) {
             /* Half to single */
             TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
-            gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
+            gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst,
+                                           tcg_ahp);
             write_fp_sreg(s, rd, tcg_rd);
             tcg_temp_free_i32(tcg_ctx, tcg_rd);
         } else {
             /* Half to double */
             TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx);
-            gen_helper_vfp_fcvt_f16_to_f64(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
+            gen_helper_vfp_fcvt_f16_to_f64(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst,
+                                           tcg_ahp);
             write_fp_dreg(s, rd, tcg_rd);
             tcg_temp_free_i64(tcg_ctx, tcg_rd);
         }
@@ -6167,8 +6712,9 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
     }
 
     switch (opcode) {
-    case 0x4: case 0x5: case 0x7:
-    {
+    case 0x4:
+    case 0x5:
+    case 0x7: {
         /* FCVT between half, single and double precision */
         int dtype = extract32(opcode, 0, 2);
         if (type == 2 || dtype == type) {
@@ -6242,8 +6788,8 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
 }
 
 /* Floating-point data-processing (2 source) - single precision */
-static void handle_fp_2src_single(DisasContext *s, int opcode,
-                                  int rd, int rn, int rm)
+static void handle_fp_2src_single(DisasContext *s, int opcode, int rd, int rn,
+                                  int rm)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i32 tcg_op1;
@@ -6296,8 +6842,8 @@ static void handle_fp_2src_single(DisasContext *s, int opcode,
 }
 
 /* Floating-point data-processing (2 source) - double precision */
-static void handle_fp_2src_double(DisasContext *s, int opcode,
-                                  int rd, int rn, int rm)
+static void handle_fp_2src_double(DisasContext *s, int opcode, int rd, int rn,
+                                  int rm)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_op1;
@@ -6350,8 +6896,8 @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
 }
 
 /* Floating-point data-processing (2 source) - half precision */
-static void handle_fp_2src_half(DisasContext *s, int opcode,
-                                int rd, int rn, int rm)
+static void handle_fp_2src_half(DisasContext *s, int opcode, int rd, int rn,
+                                int rm)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i32 tcg_op1;
@@ -6454,8 +7000,8 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn)
 }
 
 /* Floating-point data-processing (3 source) - single precision */
-static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
-                                  int rd, int rn, int rm, int ra)
+static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, int rd,
+                                  int rn, int rm, int ra)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
@@ -6493,8 +7039,8 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
 }
 
 /* Floating-point data-processing (3 source) - double precision */
-static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
-                                  int rd, int rn, int rm, int ra)
+static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, int rd,
+                                  int rn, int rm, int ra)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
@@ -6532,8 +7078,8 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
 }
 
 /* Floating-point data-processing (3 source) - half precision */
-static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
-                                int rd, int rn, int rm, int ra)
+static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, int rd,
+                                int rn, int rm, int ra)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
@@ -6559,7 +7105,8 @@ static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
         tcg_gen_xori_i32(tcg_ctx, tcg_op1, tcg_op1, 0x8000);
     }
 
-    gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
+    gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_op3,
+                               fpst);
 
     write_fp_sreg(s, rd, tcg_res);
 
@@ -6708,11 +7255,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
         case 1: /* float64 */
             tcg_double = tcg_temp_new_i64(tcg_ctx);
             if (is_signed) {
-                gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int,
-                                     tcg_shift, tcg_fpstatus);
+                gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int, tcg_shift,
+                                     tcg_fpstatus);
             } else {
-                gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int,
-                                     tcg_shift, tcg_fpstatus);
+                gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int, tcg_shift,
+                                     tcg_fpstatus);
             }
             write_fp_dreg(s, rd, tcg_double);
             tcg_temp_free_i64(tcg_ctx, tcg_double);
@@ -6721,11 +7268,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
         case 0: /* float32 */
             tcg_single = tcg_temp_new_i32(tcg_ctx);
             if (is_signed) {
-                gen_helper_vfp_sqtos(tcg_ctx, tcg_single, tcg_int,
-                                     tcg_shift, tcg_fpstatus);
+                gen_helper_vfp_sqtos(tcg_ctx, tcg_single, tcg_int, tcg_shift,
+                                     tcg_fpstatus);
             } else {
-                gen_helper_vfp_uqtos(tcg_ctx, tcg_single, tcg_int,
-                                     tcg_shift, tcg_fpstatus);
+                gen_helper_vfp_uqtos(tcg_ctx, tcg_single, tcg_int, tcg_shift,
+                                     tcg_fpstatus);
             }
             write_fp_sreg(s, rd, tcg_single);
             tcg_temp_free_i32(tcg_ctx, tcg_single);
@@ -6734,11 +7281,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
         case 3: /* float16 */
             tcg_single = tcg_temp_new_i32(tcg_ctx);
             if (is_signed) {
-                gen_helper_vfp_sqtoh(tcg_ctx, tcg_single, tcg_int,
-                                     tcg_shift, tcg_fpstatus);
+                gen_helper_vfp_sqtoh(tcg_ctx, tcg_single, tcg_int, tcg_shift,
+                                     tcg_fpstatus);
             } else {
-                gen_helper_vfp_uqtoh(tcg_ctx, tcg_single, tcg_int,
-                                     tcg_shift, tcg_fpstatus);
+                gen_helper_vfp_uqtoh(tcg_ctx, tcg_single, tcg_int, tcg_shift,
+                                     tcg_fpstatus);
             }
             write_fp_sreg(s, rd, tcg_single);
             tcg_temp_free_i32(tcg_ctx, tcg_single);
@@ -6933,7 +7480,8 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
             break;
         case 2:
             /* 64 bit to top half. */
-            tcg_gen_st_i64(tcg_ctx, tcg_rn, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rd));
+            tcg_gen_st_i64(tcg_ctx, tcg_rn, tcg_ctx->cpu_env,
+                           fp_reg_hi_offset(s, rd));
             clear_vec_high(s, true, rd);
             break;
         case 3:
@@ -6952,19 +7500,23 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
         switch (type) {
         case 0:
             /* 32 bit */
-            tcg_gen_ld32u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_32));
+            tcg_gen_ld32u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                              fp_reg_offset(s, rn, MO_32));
             break;
         case 1:
             /* 64 bit */
-            tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_64));
+            tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                           fp_reg_offset(s, rn, MO_64));
             break;
         case 2:
             /* 64 bits from top half */
-            tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rn));
+            tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                           fp_reg_hi_offset(s, rn));
             break;
         case 3:
             /* 16 bit */
-            tcg_gen_ld16u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_16));
+            tcg_gen_ld16u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                              fp_reg_offset(s, rn, MO_16));
             break;
         default:
             g_assert_not_reached();
@@ -7053,8 +7605,8 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
                 goto do_unallocated;
             }
             /* fallthru */
-        case 6: // 0b00000110: /* FMOV 32-bit */
-        case 7: // 0b00000111:
+        case 6:    // 0b00000110: /* FMOV 32-bit */
+        case 7:    // 0b00000111:
         case 0xa6: // 0b10100110: /* FMOV 64-bit */
         case 0xa7: // 0b10100111:
         case 0xce: // 0b11001110: /* FMOV top half of 128-bit */
@@ -7198,14 +7750,13 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
             read_vec_element(s, tcg_resh, rm, 0, MO_64);
             do_ext64(s, tcg_resh, tcg_resl, pos);
         }
-        tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0);
     } else {
         TCGv_i64 tcg_hh;
         typedef struct {
             int reg;
             int elt;
         } EltPosns;
-        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
+        EltPosns eltposns[] = {{rn, 0}, {rn, 1}, {rm, 0}, {rm, 1}};
         EltPosns *elt = eltposns;
 
         if (pos >= 64) {
@@ -7228,9 +7779,11 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
 
     write_vec_element(s, tcg_resl, rd, 0, MO_64);
     tcg_temp_free_i64(tcg_ctx, tcg_resl);
-    write_vec_element(s, tcg_resh, rd, 1, MO_64);
+    if (is_q) {
+        write_vec_element(s, tcg_resh, rd, 1, MO_64);
+    }
     tcg_temp_free_i64(tcg_ctx, tcg_resh);
-    clear_vec_high(s, true, rd);
+    clear_vec_high(s, is_q, rd);
 }
 
 /* TBL/TBX
@@ -7268,17 +7821,21 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
      * the input.
      */
     tcg_resl = tcg_temp_new_i64(tcg_ctx);
-    tcg_resh = tcg_temp_new_i64(tcg_ctx);
+    tcg_resh = NULL;
 
     if (is_tblx) {
         read_vec_element(s, tcg_resl, rd, 0, MO_64);
     } else {
         tcg_gen_movi_i64(tcg_ctx, tcg_resl, 0);
     }
-    if (is_tblx && is_q) {
-        read_vec_element(s, tcg_resh, rd, 1, MO_64);
-    } else {
-        tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0);
+
+    if (is_q) {
+        tcg_resh = tcg_temp_new_i64(tcg_ctx);
+        if (is_tblx) {
+            read_vec_element(s, tcg_resh, rd, 1, MO_64);
+        } else {
+            tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0);
+        }
     }
 
     tcg_idx = tcg_temp_new_i64(tcg_ctx);
@@ -7289,8 +7846,8 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
                         tcg_regno, tcg_numregs);
     if (is_q) {
         read_vec_element(s, tcg_idx, rm, 1, MO_64);
-        gen_helper_simd_tbl(tcg_ctx, tcg_resh, tcg_ctx->cpu_env, tcg_resh, tcg_idx,
-                            tcg_regno, tcg_numregs);
+        gen_helper_simd_tbl(tcg_ctx, tcg_resh, tcg_ctx->cpu_env, tcg_resh,
+                            tcg_idx, tcg_regno, tcg_numregs);
     }
     tcg_temp_free_i64(tcg_ctx, tcg_idx);
     tcg_temp_free_i32(tcg_ctx, tcg_regno);
@@ -7298,9 +7855,12 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
 
     write_vec_element(s, tcg_resl, rd, 0, MO_64);
     tcg_temp_free_i64(tcg_ctx, tcg_resl);
-    write_vec_element(s, tcg_resh, rd, 1, MO_64);
-    tcg_temp_free_i64(tcg_ctx, tcg_resh);
-    clear_vec_high(s, true, rd);
+
+    if (is_q) {
+        write_vec_element(s, tcg_resh, rd, 1, MO_64);
+        tcg_temp_free_i64(tcg_ctx, tcg_resh);
+    }
+    clear_vec_high(s, is_q, rd);
 }
 
 /* ZIP/UZP/TRN
@@ -7338,7 +7898,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
     }
 
     tcg_resl = tcg_const_i64(tcg_ctx, 0);
-    tcg_resh = tcg_const_i64(tcg_ctx, 0);
+    tcg_resh = is_q ? tcg_const_i64(tcg_ctx, 0) : NULL;
     tcg_res = tcg_temp_new_i64(tcg_ctx);
 
     for (i = 0; i < elements; i++) {
@@ -7349,8 +7909,8 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
             if (i < midpoint) {
                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
             } else {
-                read_vec_element(s, tcg_res, rm,
-                                 2 * (i - midpoint) + part, size);
+                read_vec_element(s, tcg_res, rm, 2 * (i - midpoint) + part,
+                                 size);
             }
             break;
         }
@@ -7389,9 +7949,12 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
 
     write_vec_element(s, tcg_resl, rd, 0, MO_64);
     tcg_temp_free_i64(tcg_ctx, tcg_resl);
-    write_vec_element(s, tcg_resh, rd, 1, MO_64);
-    tcg_temp_free_i64(tcg_ctx, tcg_resh);
-    clear_vec_high(s, true, rd);
+
+    if (is_q) {
+        write_vec_element(s, tcg_resh, rd, 1, MO_64);
+        tcg_temp_free_i64(tcg_ctx, tcg_resh);
+    }
+    clear_vec_high(s, is_q, rd);
 }
 
 /*
@@ -7496,8 +8059,8 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
             return;
         }
         /* fall through */
-    case 0x3: /* SADDLV, UADDLV */
-    case 0xa: /* SMAXV, UMAXV */
+    case 0x3:  /* SADDLV, UADDLV */
+    case 0xa:  /* SMAXV, UMAXV */
     case 0x1a: /* SMINV, UMINV */
         if (size == 3 || (size == 2 && !is_q)) {
             unallocated_encoding(s);
@@ -7577,7 +8140,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
             default:
                 g_assert_not_reached();
             }
-
         }
     } else {
         /* Floating point vector reduction ops which work across 32
@@ -7650,8 +8212,8 @@ static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
 
     index = imm5 >> (size + 1);
     tcg_gen_gvec_dup_mem(tcg_ctx, size, vec_full_reg_offset(s, rd),
-                         vec_reg_offset(s, rn, index, size),
-                         is_q ? 16 : 8, vec_full_reg_size(s));
+                         vec_reg_offset(s, rn, index, size), is_q ? 16 : 8,
+                         vec_full_reg_size(s));
 }
 
 /* DUP (element, scalar)
@@ -7660,8 +8222,7 @@ static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
  * +-----------------------+--------+-------------+------+------+
  */
-static void handle_simd_dupes(DisasContext *s, int rd, int rn,
-                              int imm5)
+static void handle_simd_dupes(DisasContext *s, int rd, int rn, int imm5)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int size = ctz32(imm5);
@@ -7730,8 +8291,8 @@ static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
  * size: encoded in imm5 (see ARM ARM LowestSetBit())
  * index: encoded in imm5<4:size+1>
  */
-static void handle_simd_inse(DisasContext *s, int rd, int rn,
-                             int imm4, int imm5)
+static void handle_simd_inse(DisasContext *s, int rd, int rn, int imm4,
+                             int imm5)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int size = ctz32(imm5);
@@ -7747,7 +8308,7 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn,
         return;
     }
 
-    dst_index = extract32(imm5, 1+size, 5);
+    dst_index = extract32(imm5, 1 + size, 5);
     src_index = extract32(imm4, size, 4);
 
     tmp = tcg_temp_new_i64(tcg_ctx);
@@ -7761,7 +8322,6 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn,
     clear_vec_high(s, true, rd);
 }
 
-
 /* INS (General)
  *
  *  31                   21 20    16 15        10  9    5 4    0
@@ -7820,9 +8380,7 @@ static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
             return;
         }
     } else {
-        if (size > 3
-            || (size < 3 && is_q)
-            || (size == 3 && !is_q)) {
+        if (size > 3 || (size < 3 && is_q) || (size == 3 && !is_q)) {
             unallocated_encoding(s);
             return;
         }
@@ -7832,7 +8390,7 @@ static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
         return;
     }
 
-    element = extract32(imm5, 1+size, 4);
+    element = extract32(imm5, 1 + size, 4);
 
     tcg_rd = cpu_reg(s, rd);
     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
@@ -8014,8 +8572,8 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
 
     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
         /* MOVI or MVNI, with MVNI negation handled above.  */
-        tcg_gen_gvec_dup64i(tcg_ctx, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
-                            vec_full_reg_size(s), imm);
+        tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, vec_full_reg_offset(s, rd),
+                             is_q ? 16 : 8, vec_full_reg_size(s), imm);
     } else {
         /* ORR or BIC, with BIC negation to AND handled above.  */
         if (is_neg) {
@@ -8083,9 +8641,9 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
 
         fpst = NULL;
         break;
-    case 0xc: /* FMAXNMP */
-    case 0xd: /* FADDP */
-    case 0xf: /* FMAXP */
+    case 0xc:  /* FMAXNMP */
+    case 0xd:  /* FADDP */
+    case 0xf:  /* FMAXP */
     case 0x2c: /* FMINNMP */
     case 0x2f: /* FMINP */
         /* FP op, size[0] is 32 or 64 bit*/
@@ -8158,19 +8716,24 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
         if (size == MO_16) {
             switch (opcode) {
             case 0xc: /* FMAXNMP */
-                gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                           fpst);
                 break;
             case 0xd: /* FADDP */
-                gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0xf: /* FMAXP */
-                gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x2c: /* FMINNMP */
-                gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                           fpst);
                 break;
             case 0x2f: /* FMINP */
-                gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             default:
                 g_assert_not_reached();
@@ -8178,7 +8741,8 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
         } else {
             switch (opcode) {
             case 0xc: /* FMAXNMP */
-                gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0xd: /* FADDP */
                 gen_helper_vfp_adds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
@@ -8187,7 +8751,8 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
                 gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x2c: /* FMINNMP */
-                gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x2f: /* FMINP */
                 gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
@@ -8215,9 +8780,10 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
  * This code is handles the common shifting code and is used by both
  * the vector and scalar code.
  */
-static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_i64 tcg_src,
-                                    TCGv_i64 tcg_rnd, bool accumulate,
-                                    bool is_u, int size, int shift)
+static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res,
+                                    TCGv_i64 tcg_src, TCGv_i64 tcg_rnd,
+                                    bool accumulate, bool is_u, int size,
+                                    int shift)
 {
     bool extended_result = false;
     bool round = tcg_rnd != NULL;
@@ -8243,13 +8809,11 @@ static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_
             if (!is_u) {
                 /* take care of sign extending tcg_res */
                 tcg_gen_sari_i64(tcg_ctx, tcg_src_hi, tcg_src, 63);
-                tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi,
-                                 tcg_src, tcg_src_hi,
-                                 tcg_rnd, tcg_zero);
+                tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi, tcg_src,
+                                 tcg_src_hi, tcg_rnd, tcg_zero);
             } else {
-                tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi,
-                                 tcg_src, tcg_zero,
-                                 tcg_rnd, tcg_zero);
+                tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi, tcg_src,
+                                 tcg_zero, tcg_rnd, tcg_zero);
             }
             tcg_temp_free_i64(tcg_ctx, tcg_zero);
         } else {
@@ -8298,9 +8862,8 @@ static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_
 }
 
 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
-static void handle_scalar_simd_shri(DisasContext *s,
-                                    bool is_u, int immh, int immb,
-                                    int opcode, int rn, int rd)
+static void handle_scalar_simd_shri(DisasContext *s, bool is_u, int immh,
+                                    int immb, int opcode, int rn, int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     const int size = 3;
@@ -8345,7 +8908,8 @@ static void handle_scalar_simd_shri(DisasContext *s,
     }
 
     tcg_rn = read_fp_dreg(s, rn);
-    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(tcg_ctx);
+    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd)
+                                    : tcg_temp_new_i64(tcg_ctx);
 
     if (insert) {
         /* shift count same as element size is valid but does nothing;
@@ -8354,11 +8918,12 @@ static void handle_scalar_simd_shri(DisasContext *s,
         int esize = 8 << size;
         if (shift != esize) {
             tcg_gen_shri_i64(tcg_ctx, tcg_rn, tcg_rn, shift);
-            tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
+            tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, 0,
+                                esize - shift);
         }
     } else {
-        handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round,
-                                accumulate, is_u, size, shift);
+        handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, accumulate,
+                                is_u, size, shift);
     }
 
     write_fp_dreg(s, rd, tcg_rd);
@@ -8371,9 +8936,8 @@ static void handle_scalar_simd_shri(DisasContext *s,
 }
 
 /* SHL/SLI - Scalar shift left */
-static void handle_scalar_simd_shli(DisasContext *s, bool insert,
-                                    int immh, int immb, int opcode,
-                                    int rn, int rd)
+static void handle_scalar_simd_shli(DisasContext *s, bool insert, int immh,
+                                    int immb, int opcode, int rn, int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int size = 32 - clz32(immh) - 1;
@@ -8409,9 +8973,8 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert,
 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
  * (signed/unsigned) narrowing */
 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
-                                   bool is_u_shift, bool is_u_narrow,
-                                   int immh, int immb, int opcode,
-                                   int rn, int rd)
+                                   bool is_u_shift, bool is_u_narrow, int immh,
+                                   int immb, int opcode, int rn, int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int immhb = immh << 3 | immb;
@@ -8425,21 +8988,15 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
     TCGv_i32 tcg_rd_narrowed;
     TCGv_i64 tcg_final;
 
-    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
-        { gen_helper_neon_narrow_sat_s8,
-          gen_helper_neon_unarrow_sat8 },
-        { gen_helper_neon_narrow_sat_s16,
-          gen_helper_neon_unarrow_sat16 },
-        { gen_helper_neon_narrow_sat_s32,
-          gen_helper_neon_unarrow_sat32 },
-        { NULL, NULL },
-    };
-    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
-        gen_helper_neon_narrow_sat_u8,
-        gen_helper_neon_narrow_sat_u16,
-        gen_helper_neon_narrow_sat_u32,
-        NULL
+    static NeonGenNarrowEnvFn *const signed_narrow_fns[4][2] = {
+        {gen_helper_neon_narrow_sat_s8, gen_helper_neon_unarrow_sat8},
+        {gen_helper_neon_narrow_sat_s16, gen_helper_neon_unarrow_sat16},
+        {gen_helper_neon_narrow_sat_s32, gen_helper_neon_unarrow_sat32},
+        {NULL, NULL},
     };
+    static NeonGenNarrowEnvFn *const unsigned_narrow_fns[4] = {
+        gen_helper_neon_narrow_sat_u8, gen_helper_neon_narrow_sat_u16,
+        gen_helper_neon_narrow_sat_u32, NULL};
     NeonGenNarrowEnvFn *narrowfn;
 
     int i;
@@ -8475,11 +9032,12 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
 
     for (i = 0; i < elements; i++) {
         read_vec_element(s, tcg_rn, rn, i, ldop);
-        handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round,
-                                false, is_u_shift, size+1, shift);
+        handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, false,
+                                is_u_shift, size + 1, shift);
         narrowfn(tcg_ctx, tcg_rd_narrowed, tcg_ctx->cpu_env, tcg_rd);
         tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_rd_narrowed);
-        tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, esize);
+        tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i,
+                            esize);
     }
 
     if (!is_q) {
@@ -8501,8 +9059,8 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
 
 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
-                             bool src_unsigned, bool dst_unsigned,
-                             int immh, int immb, int rn, int rd)
+                             bool src_unsigned, bool dst_unsigned, int immh,
+                             int immb, int rn, int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int immhb = immh << 3 | immb;
@@ -8544,9 +9102,9 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
 
     if (size == 3) {
         TCGv_i64 tcg_shift = tcg_const_i64(tcg_ctx, shift);
-        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
-            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
-            { NULL, gen_helper_neon_qshl_u64 },
+        static NeonGenTwo64OpEnvFn *const fns[2][2] = {
+            {gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64},
+            {NULL, gen_helper_neon_qshl_u64},
         };
         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
         int maxpass = is_q ? 2 : 1;
@@ -8564,21 +9122,14 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
         clear_vec_high(s, is_q, rd);
     } else {
         TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, shift);
-        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
-            {
-                { gen_helper_neon_qshl_s8,
-                  gen_helper_neon_qshl_s16,
-                  gen_helper_neon_qshl_s32 },
-                { gen_helper_neon_qshlu_s8,
-                  gen_helper_neon_qshlu_s16,
-                  gen_helper_neon_qshlu_s32 }
-            }, {
-                { NULL, NULL, NULL },
-                { gen_helper_neon_qshl_u8,
-                  gen_helper_neon_qshl_u16,
-                  gen_helper_neon_qshl_u32 }
-            }
-        };
+        static NeonGenTwoOpEnvFn *const fns[2][2][3] = {
+            {{gen_helper_neon_qshl_s8, gen_helper_neon_qshl_s16,
+              gen_helper_neon_qshl_s32},
+             {gen_helper_neon_qshlu_s8, gen_helper_neon_qshlu_s16,
+              gen_helper_neon_qshlu_s32}},
+            {{NULL, NULL, NULL},
+             {gen_helper_neon_qshl_u8, gen_helper_neon_qshl_u16,
+              gen_helper_neon_qshl_u32}}};
         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
         MemOp memop = scalar ? size : MO_32;
         int maxpass = scalar ? 1 : is_q ? 4 : 2;
@@ -8618,8 +9169,8 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
 
 /* Common vector code for handling integer to FP conversion */
 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
-                                   int elements, int is_signed,
-                                   int fracbits, int size)
+                                   int elements, int is_signed, int fracbits,
+                                   int size)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_ptr tcg_fpst = get_fpstatus_ptr(tcg_ctx, size == MO_16);
@@ -8640,11 +9191,11 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
             read_vec_element(s, tcg_int64, rn, pass, mop);
 
             if (is_signed) {
-                gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int64,
-                                     tcg_shift, tcg_fpst);
+                gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int64, tcg_shift,
+                                     tcg_fpst);
             } else {
-                gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int64,
-                                     tcg_shift, tcg_fpst);
+                gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int64, tcg_shift,
+                                     tcg_fpst);
             }
             if (elements == 1) {
                 write_fp_dreg(s, rd, tcg_double);
@@ -8675,9 +9226,11 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
                     }
                 } else {
                     if (is_signed) {
-                        gen_helper_vfp_sitos(tcg_ctx, tcg_float, tcg_int32, tcg_fpst);
+                        gen_helper_vfp_sitos(tcg_ctx, tcg_float, tcg_int32,
+                                             tcg_fpst);
                     } else {
-                        gen_helper_vfp_uitos(tcg_ctx, tcg_float, tcg_int32, tcg_fpst);
+                        gen_helper_vfp_uitos(tcg_ctx, tcg_float, tcg_int32,
+                                             tcg_fpst);
                     }
                 }
                 break;
@@ -8692,9 +9245,11 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
                     }
                 } else {
                     if (is_signed) {
-                        gen_helper_vfp_sitoh(tcg_ctx, tcg_float, tcg_int32, tcg_fpst);
+                        gen_helper_vfp_sitoh(tcg_ctx, tcg_float, tcg_int32,
+                                             tcg_fpst);
                     } else {
-                        gen_helper_vfp_uitoh(tcg_ctx, tcg_float, tcg_int32, tcg_fpst);
+                        gen_helper_vfp_uitoh(tcg_ctx, tcg_float, tcg_int32,
+                                             tcg_fpst);
                     }
                 }
                 break;
@@ -8723,9 +9278,8 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
 
 /* UCVTF/SCVTF - Integer to FP conversion */
 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
-                                         bool is_q, bool is_u,
-                                         int immh, int immb, int opcode,
-                                         int rn, int rd)
+                                         bool is_q, bool is_u, int immh,
+                                         int immb, int opcode, int rn, int rd)
 {
     int size, elements, fracbits;
     int immhb = immh << 3 | immb;
@@ -8767,8 +9321,8 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
 
 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
-                                         bool is_q, bool is_u,
-                                         int immh, int immb, int rn, int rd)
+                                         bool is_q, bool is_u, int immh,
+                                         int immb, int rn, int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int immhb = immh << 3 | immb;
@@ -8817,9 +9371,11 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
 
             read_vec_element(s, tcg_op, rn, pass, MO_64);
             if (is_u) {
-                gen_helper_vfp_touqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
+                gen_helper_vfp_touqd(tcg_ctx, tcg_op, tcg_op, tcg_shift,
+                                     tcg_fpstatus);
             } else {
-                gen_helper_vfp_tosqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
+                gen_helper_vfp_tosqd(tcg_ctx, tcg_op, tcg_op, tcg_shift,
+                                     tcg_fpstatus);
             }
             write_vec_element(s, tcg_op, rd, pass, MO_64);
             tcg_temp_free_i64(tcg_ctx, tcg_op);
@@ -8910,8 +9466,8 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
         break;
     case 0x1c: /* SCVTF, UCVTF */
-        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
-                                     opcode, rn, rd);
+        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, opcode,
+                                     rn, rd);
         break;
     case 0x10: /* SQSHRUN, SQSHRUN2 */
     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
@@ -8919,13 +9475,13 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
-        handle_vec_simd_sqshrn(s, true, false, false, true,
-                               immh, immb, opcode, rn, rd);
+        handle_vec_simd_sqshrn(s, true, false, false, true, immh, immb, opcode,
+                               rn, rd);
         break;
     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
-        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
-                               immh, immb, opcode, rn, rd);
+        handle_vec_simd_sqshrn(s, true, false, is_u, is_u, immh, immb, opcode,
+                               rn, rd);
         break;
     case 0xc: /* SQSHLU */
         if (!is_u) {
@@ -8994,7 +9550,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
 
         tcg_gen_mul_i64(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
-        gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_res);
+        gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
+                                          tcg_res, tcg_res);
 
         switch (opcode) {
         case 0xd: /* SQDMULL, SQDMULL2 */
@@ -9004,8 +9561,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
             /* fall through */
         case 0x9: /* SQDMLAL, SQDMLAL2 */
             read_vec_element(s, tcg_op1, rd, 0, MO_64);
-            gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
-                                              tcg_res, tcg_op1);
+            gen_helper_neon_addl_saturate_s64(
+                tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_op1);
             break;
         default:
             g_assert_not_reached();
@@ -9022,7 +9579,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
         TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
 
         gen_helper_neon_mull_s16(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
-        gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_res);
+        gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
+                                          tcg_res, tcg_res);
 
         switch (opcode) {
         case 0xd: /* SQDMULL, SQDMULL2 */
@@ -9034,8 +9592,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
         {
             TCGv_i64 tcg_op3 = tcg_temp_new_i64(tcg_ctx);
             read_vec_element(s, tcg_op3, rd, 0, MO_32);
-            gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
-                                              tcg_res, tcg_op3);
+            gen_helper_neon_addl_saturate_s32(
+                tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_op3);
             tcg_temp_free_i64(tcg_ctx, tcg_op3);
             break;
         }
@@ -9066,16 +9624,20 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
     switch (opcode) {
     case 0x1: /* SQADD */
         if (u) {
-            gen_helper_neon_qadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
+            gen_helper_neon_qadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn,
+                                     tcg_rm);
         } else {
-            gen_helper_neon_qadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
+            gen_helper_neon_qadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn,
+                                     tcg_rm);
         }
         break;
     case 0x5: /* SQSUB */
         if (u) {
-            gen_helper_neon_qsub_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
+            gen_helper_neon_qsub_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn,
+                                     tcg_rm);
         } else {
-            gen_helper_neon_qsub_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
+            gen_helper_neon_qsub_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn,
+                                     tcg_rm);
         }
         break;
     case 0x6: /* CMGT, CMHI */
@@ -9106,9 +9668,11 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
         break;
     case 0x9: /* SQSHL, UQSHL */
         if (u) {
-            gen_helper_neon_qshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
+            gen_helper_neon_qshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn,
+                                     tcg_rm);
         } else {
-            gen_helper_neon_qshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
+            gen_helper_neon_qshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn,
+                                     tcg_rm);
         }
         break;
     case 0xa: /* SRSHL, URSHL */
@@ -9120,9 +9684,11 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
         break;
     case 0xb: /* SQRSHL, UQRSHL */
         if (u) {
-            gen_helper_neon_qrshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
+            gen_helper_neon_qrshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn,
+                                      tcg_rm);
         } else {
-            gen_helper_neon_qrshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
+            gen_helper_neon_qrshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn,
+                                      tcg_rm);
         }
         break;
     case 0x10: /* ADD, SUB */
@@ -9169,7 +9735,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                                        tcg_res, fpst);
                 break;
             case 0x18: /* FMAXNM */
-                gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x1a: /* FADD */
                 gen_helper_vfp_addd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
@@ -9178,7 +9745,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                 gen_helper_vfp_mulxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x1c: /* FCMEQ */
-                gen_helper_neon_ceq_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_neon_ceq_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x1e: /* FMAX */
                 gen_helper_vfp_maxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
@@ -9187,7 +9755,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                 gen_helper_recpsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x38: /* FMINNM */
-                gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x3a: /* FSUB */
                 gen_helper_vfp_subd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
@@ -9196,16 +9765,19 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                 gen_helper_vfp_mind(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x3f: /* FRSQRTS */
-                gen_helper_rsqrtsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_rsqrtsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x5b: /* FMUL */
                 gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x5c: /* FCMGE */
-                gen_helper_neon_cge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_neon_cge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x5d: /* FACGE */
-                gen_helper_neon_acge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_neon_acge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                         fpst);
                 break;
             case 0x5f: /* FDIV */
                 gen_helper_vfp_divd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
@@ -9215,10 +9787,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                 gen_helper_vfp_absd(tcg_ctx, tcg_res, tcg_res);
                 break;
             case 0x7c: /* FCMGT */
-                gen_helper_neon_cgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_neon_cgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x7d: /* FACGT */
-                gen_helper_neon_acgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_neon_acgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                         fpst);
                 break;
             default:
                 g_assert_not_reached();
@@ -9255,7 +9829,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                 gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x1c: /* FCMEQ */
-                gen_helper_neon_ceq_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_neon_ceq_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x1e: /* FMAX */
                 gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
@@ -9264,10 +9839,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                 gen_helper_recpsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x18: /* FMAXNM */
-                gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x38: /* FMINNM */
-                gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x3a: /* FSUB */
                 gen_helper_vfp_subs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
@@ -9276,16 +9853,19 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                 gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x3f: /* FRSQRTS */
-                gen_helper_rsqrtsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_rsqrtsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x5b: /* FMUL */
                 gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x5c: /* FCMGE */
-                gen_helper_neon_cge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_neon_cge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x5d: /* FACGE */
-                gen_helper_neon_acge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_neon_acge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                         fpst);
                 break;
             case 0x5f: /* FDIV */
                 gen_helper_vfp_divs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
@@ -9295,10 +9875,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                 gen_helper_vfp_abss(tcg_ctx, tcg_res, tcg_res);
                 break;
             case 0x7c: /* FCMGT */
-                gen_helper_neon_cgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_neon_cgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x7d: /* FACGT */
-                gen_helper_neon_acgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_neon_acgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                         fpst);
                 break;
             default:
                 g_assert_not_reached();
@@ -9376,10 +9958,10 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
     case 0x9: /* SQSHL, UQSHL */
     case 0xb: /* SQRSHL, UQRSHL */
         break;
-    case 0x8: /* SSHL, USHL */
-    case 0xa: /* SRSHL, URSHL */
-    case 0x6: /* CMGT, CMHI */
-    case 0x7: /* CMGE, CMHS */
+    case 0x8:  /* SSHL, USHL */
+    case 0xa:  /* SRSHL, URSHL */
+    case 0x6:  /* CMGT, CMHI */
+    case 0x7:  /* CMGE, CMHS */
     case 0x11: /* CMTST, CMEQ */
     case 0x10: /* ADD, SUB (vector) */
         if (size != 3) {
@@ -9429,49 +10011,49 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
         switch (opcode) {
         case 0x1: /* SQADD, UQADD */
         {
-            static NeonGenTwoOpEnvFn * const fns[3][2] = {
-                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
-                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
-                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
+            static NeonGenTwoOpEnvFn *const fns[3][2] = {
+                {gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8},
+                {gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16},
+                {gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32},
             };
             genenvfn = fns[size][u];
             break;
         }
         case 0x5: /* SQSUB, UQSUB */
         {
-            static NeonGenTwoOpEnvFn * const fns[3][2] = {
-                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
-                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
-                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
+            static NeonGenTwoOpEnvFn *const fns[3][2] = {
+                {gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8},
+                {gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16},
+                {gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32},
             };
             genenvfn = fns[size][u];
             break;
         }
         case 0x9: /* SQSHL, UQSHL */
         {
-            static NeonGenTwoOpEnvFn * const fns[3][2] = {
-                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
-                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
-                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
+            static NeonGenTwoOpEnvFn *const fns[3][2] = {
+                {gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8},
+                {gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16},
+                {gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32},
             };
             genenvfn = fns[size][u];
             break;
         }
         case 0xb: /* SQRSHL, UQRSHL */
         {
-            static NeonGenTwoOpEnvFn * const fns[3][2] = {
-                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
-                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
-                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
+            static NeonGenTwoOpEnvFn *const fns[3][2] = {
+                {gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8},
+                {gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16},
+                {gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32},
             };
             genenvfn = fns[size][u];
             break;
         }
         case 0x16: /* SQDMULH, SQRDMULH */
         {
-            static NeonGenTwoOpEnvFn * const fns[2][2] = {
-                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
-                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
+            static NeonGenTwoOpEnvFn *const fns[2][2] = {
+                {gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16},
+                {gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32},
             };
             assert(size == 1 || size == 2);
             genenvfn = fns[size - 1][u];
@@ -9511,7 +10093,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
     int rm = extract32(insn, 16, 5);
     bool u = extract32(insn, 29, 1);
     bool a = extract32(insn, 23, 1);
-    int fpopcode = opcode | (a << 3) |  (u << 4);
+    int fpopcode = opcode | (a << 3) | (u << 4);
     TCGv_ptr fpst;
     TCGv_i32 tcg_op1;
     TCGv_i32 tcg_op2;
@@ -9582,7 +10164,6 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
 
     write_fp_sreg(s, rd, tcg_res);
 
-
     tcg_temp_free_i32(tcg_ctx, tcg_res);
     tcg_temp_free_i32(tcg_ctx, tcg_op1);
     tcg_temp_free_i32(tcg_ctx, tcg_op2);
@@ -9647,16 +10228,20 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
     switch (opcode) {
     case 0x0: /* SQRDMLAH */
         if (size == 1) {
-            gen_helper_neon_qrdmlah_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3);
+            gen_helper_neon_qrdmlah_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1,
+                                        ele2, ele3);
         } else {
-            gen_helper_neon_qrdmlah_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3);
+            gen_helper_neon_qrdmlah_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1,
+                                        ele2, ele3);
         }
         break;
     case 0x1: /* SQRDMLSH */
         if (size == 1) {
-            gen_helper_neon_qrdmlsh_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3);
+            gen_helper_neon_qrdmlsh_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1,
+                                        ele2, ele3);
         } else {
-            gen_helper_neon_qrdmlsh_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3);
+            gen_helper_neon_qrdmlsh_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1,
+                                        ele2, ele3);
         }
         break;
     default:
@@ -9785,9 +10370,9 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
     }
 }
 
-static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
-                                   bool is_scalar, bool is_u, bool is_q,
-                                   int size, int rn, int rd)
+static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, bool is_scalar,
+                                   bool is_u, bool is_q, int size, int rn,
+                                   int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     bool is_double = (size == MO_64);
@@ -9803,7 +10388,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
         TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
         TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
         TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
-        NeonGenTwoDoubleOPFn *genfn = NULL;
+        NeonGenTwoDoubleOpFn *genfn = NULL;
         bool swap = false;
         int pass;
 
@@ -9845,7 +10430,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
         TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
         TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0);
         TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
-        NeonGenTwoSingleOPFn *genfn = NULL;
+        NeonGenTwoSingleOpFn *genfn = NULL;
         bool swap = false;
         int pass, maxpasses;
 
@@ -9922,9 +10507,9 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
     tcg_temp_free_ptr(tcg_ctx, fpst);
 }
 
-static void handle_2misc_reciprocal(DisasContext *s, int opcode,
-                                    bool is_scalar, bool is_u, bool is_q,
-                                    int size, int rn, int rd)
+static void handle_2misc_reciprocal(DisasContext *s, int opcode, bool is_scalar,
+                                    bool is_u, bool is_q, int size, int rn,
+                                    int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     bool is_double = (size == 3);
@@ -9971,7 +10556,7 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
 
             switch (opcode) {
             case 0x3c: /* URECPE */
-                gen_helper_recpe_u32(tcg_ctx, tcg_res, tcg_op, fpst);
+                gen_helper_recpe_u32(tcg_ctx, tcg_res, tcg_op);
                 break;
             case 0x3d: /* FRECPE */
                 gen_helper_recpe_f32(tcg_ctx, tcg_res, tcg_op, fpst);
@@ -10001,9 +10586,8 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
     tcg_temp_free_ptr(tcg_ctx, fpst);
 }
 
-static void handle_2misc_narrow(DisasContext *s, bool scalar,
-                                int opcode, bool u, bool is_q,
-                                int size, int rn, int rd)
+static void handle_2misc_narrow(DisasContext *s, bool scalar, int opcode,
+                                bool u, bool is_q, int size, int rn, int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
@@ -10033,12 +10617,12 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
         switch (opcode) {
         case 0x12: /* XTN, SQXTUN */
         {
-            static NeonGenNarrowFn * const xtnfns[3] = {
+            static NeonGenNarrowFn *const xtnfns[3] = {
                 gen_helper_neon_narrow_u8,
                 gen_helper_neon_narrow_u16,
                 tcg_gen_extrl_i64_i32,
             };
-            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
+            static NeonGenNarrowEnvFn *const sqxtunfns[3] = {
                 gen_helper_neon_unarrow_sat8,
                 gen_helper_neon_unarrow_sat16,
                 gen_helper_neon_unarrow_sat32,
@@ -10052,13 +10636,12 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
         }
         case 0x14: /* SQXTN, UQXTN */
         {
-            static NeonGenNarrowEnvFn * const fns[3][2] = {
-                { gen_helper_neon_narrow_sat_s8,
-                  gen_helper_neon_narrow_sat_u8 },
-                { gen_helper_neon_narrow_sat_s16,
-                  gen_helper_neon_narrow_sat_u16 },
-                { gen_helper_neon_narrow_sat_s32,
-                  gen_helper_neon_narrow_sat_u32 },
+            static NeonGenNarrowEnvFn *const fns[3][2] = {
+                {gen_helper_neon_narrow_sat_s8, gen_helper_neon_narrow_sat_u8},
+                {gen_helper_neon_narrow_sat_s16,
+                 gen_helper_neon_narrow_sat_u16},
+                {gen_helper_neon_narrow_sat_s32,
+                 gen_helper_neon_narrow_sat_u32},
             };
             genenvfn = fns[size][u];
             break;
@@ -10066,7 +10649,8 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
         case 0x16: /* FCVTN, FCVTN2 */
             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
             if (size == 2) {
-                gen_helper_vfp_fcvtsd(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env);
+                gen_helper_vfp_fcvtsd(tcg_ctx, tcg_res[pass], tcg_op,
+                                      tcg_ctx->cpu_env);
             } else {
                 TCGv_i32 tcg_lo = tcg_temp_new_i32(tcg_ctx);
                 TCGv_i32 tcg_hi = tcg_temp_new_i32(tcg_ctx);
@@ -10074,21 +10658,25 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
                 TCGv_i32 ahp = get_ahp_flag(tcg_ctx);
 
                 tcg_gen_extr_i64_i32(tcg_ctx, tcg_lo, tcg_hi, tcg_op);
-                gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_lo, tcg_lo, fpst, ahp);
-                gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_hi, tcg_hi, fpst, ahp);
-                tcg_gen_deposit_i32(tcg_ctx, tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
+                gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_lo, tcg_lo, fpst,
+                                               ahp);
+                gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_hi, tcg_hi, fpst,
+                                               ahp);
+                tcg_gen_deposit_i32(tcg_ctx, tcg_res[pass], tcg_lo, tcg_hi, 16,
+                                    16);
                 tcg_temp_free_i32(tcg_ctx, tcg_lo);
                 tcg_temp_free_i32(tcg_ctx, tcg_hi);
                 tcg_temp_free_ptr(tcg_ctx, fpst);
                 tcg_temp_free_i32(tcg_ctx, ahp);
             }
             break;
-        case 0x56:  /* FCVTXN, FCVTXN2 */
+        case 0x56: /* FCVTXN, FCVTXN2 */
             /* 64 bit to 32 bit float conversion
              * with von Neumann rounding (round to odd)
              */
             assert(size == 2);
-            gen_helper_fcvtx_f64_to_f32(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env);
+            gen_helper_fcvtx_f64_to_f32(tcg_ctx, tcg_res[pass], tcg_op,
+                                        tcg_ctx->cpu_env);
             break;
         default:
             g_assert_not_reached();
@@ -10127,9 +10715,11 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
             read_vec_element(s, tcg_rd, rd, pass, MO_64);
 
             if (is_u) { /* USQADD */
-                gen_helper_neon_uqadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
+                gen_helper_neon_uqadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                                          tcg_rn, tcg_rd);
             } else { /* SUQADD */
-                gen_helper_neon_sqadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
+                gen_helper_neon_sqadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                                          tcg_rn, tcg_rd);
             }
             write_vec_element(s, tcg_rd, rd, pass, MO_64);
         }
@@ -10159,13 +10749,16 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
             if (is_u) { /* USQADD */
                 switch (size) {
                 case 0:
-                    gen_helper_neon_uqadd_s8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
+                    gen_helper_neon_uqadd_s8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                                             tcg_rn, tcg_rd);
                     break;
                 case 1:
-                    gen_helper_neon_uqadd_s16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
+                    gen_helper_neon_uqadd_s16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                                              tcg_rn, tcg_rd);
                     break;
                 case 2:
-                    gen_helper_neon_uqadd_s32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
+                    gen_helper_neon_uqadd_s32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                                              tcg_rn, tcg_rd);
                     break;
                 default:
                     g_assert_not_reached();
@@ -10173,13 +10766,16 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
             } else { /* SUQADD */
                 switch (size) {
                 case 0:
-                    gen_helper_neon_sqadd_u8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
+                    gen_helper_neon_sqadd_u8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                                             tcg_rn, tcg_rd);
                     break;
                 case 1:
-                    gen_helper_neon_sqadd_u16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
+                    gen_helper_neon_sqadd_u16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                                              tcg_rn, tcg_rd);
                     break;
                 case 2:
-                    gen_helper_neon_sqadd_u32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
+                    gen_helper_neon_sqadd_u32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env,
+                                              tcg_rn, tcg_rd);
                     break;
                 default:
                     g_assert_not_reached();
@@ -10369,10 +10965,10 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
         case 0x7: /* SQABS, SQNEG */
         {
             NeonGenOneOpEnvFn *genfn;
-            static NeonGenOneOpEnvFn * const fns[3][2] = {
-                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
-                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
-                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
+            static NeonGenOneOpEnvFn *const fns[3][2] = {
+                {gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8},
+                {gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16},
+                {gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32},
             };
             genfn = fns[size][u];
             genfn(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn);
@@ -10385,7 +10981,8 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
         case 0x3b: /* FCVTZS */
         {
             TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
-            gen_helper_vfp_tosls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
+            gen_helper_vfp_tosls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift,
+                                 tcg_fpstatus);
             tcg_temp_free_i32(tcg_ctx, tcg_shift);
             break;
         }
@@ -10396,7 +10993,8 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
         case 0x7b: /* FCVTZU */
         {
             TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
-            gen_helper_vfp_touls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
+            gen_helper_vfp_touls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift,
+                                 tcg_fpstatus);
             tcg_temp_free_i32(tcg_ctx, tcg_shift);
             break;
         }
@@ -10424,16 +11022,7 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
     int size = 32 - clz32(immh) - 1;
     int immhb = immh << 3 | immb;
     int shift = 2 * (8 << size) - immhb;
-    bool accumulate = false;
-    int dsize = is_q ? 128 : 64;
-    int esize = 8 << size;
-    int elements = dsize/esize;
-    MemOp memop = size | (is_u ? 0 : MO_SIGN);
-    TCGv_i64 tcg_rn = new_tmp_a64(s);
-    TCGv_i64 tcg_rd = new_tmp_a64(s);
-    TCGv_i64 tcg_round;
-    uint64_t round_const;
-    int i;
+    GVecGen2iFn *gvec_fn;
 
     if (extract32(immh, 3, 1) && !is_q) {
         unallocated_encoding(s);
@@ -10447,73 +11036,44 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
 
     switch (opcode) {
     case 0x02: /* SSRA / USRA (accumulate) */
-        if (is_u) {
-            /* Shift count same as element size produces zero to add.  */
-            if (shift == 8 << size) {
-                goto done;
-            }
-            gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
-        } else {
-            /* Shift count same as element size produces all sign to add.  */
-            if (shift == 8 << size) {
-                shift -= 1;
-            }
-            gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
-        }
-        return;
+        gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
+        break;
+
     case 0x08: /* SRI */
-        /* Shift count same as element size is valid but does nothing.  */
-        if (shift == 8 << size) {
-            goto done;
-        }
-        gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
-        return;
+        gvec_fn = gen_gvec_sri;
+        break;
 
     case 0x00: /* SSHR / USHR */
         if (is_u) {
             if (shift == 8 << size) {
                 /* Shift count the same size as element size produces zero.  */
-                tcg_gen_gvec_dup8i(tcg_ctx, vec_full_reg_offset(s, rd),
-                                   is_q ? 16 : 8, vec_full_reg_size(s), 0);
-            } else {
-                gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
+                tcg_gen_gvec_dup_imm(tcg_ctx, size, vec_full_reg_offset(s, rd),
+                                     is_q ? 16 : 8, vec_full_reg_size(s), 0);
+                return;
             }
+            gvec_fn = tcg_gen_gvec_shri;
         } else {
             /* Shift count the same size as element size produces all sign.  */
             if (shift == 8 << size) {
                 shift -= 1;
             }
-            gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
+            gvec_fn = tcg_gen_gvec_sari;
         }
-        return;
+        break;
 
     case 0x04: /* SRSHR / URSHR (rounding) */
+        gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
         break;
+
     case 0x06: /* SRSRA / URSRA (accum + rounding) */
-        accumulate = true;
+        gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
+
         break;
     default:
         g_assert_not_reached();
     }
 
-    round_const = 1ULL << (shift - 1);
-    tcg_round = tcg_const_i64(tcg_ctx, round_const);
-
-    for (i = 0; i < elements; i++) {
-        read_vec_element(s, tcg_rn, rn, i, memop);
-        if (accumulate) {
-            read_vec_element(s, tcg_rd, rd, i, memop);
-        }
-
-        handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round,
-                                accumulate, is_u, size, shift);
-
-        write_vec_element(s, tcg_rd, rd, i, size);
-    }
-    tcg_temp_free_i64(tcg_ctx, tcg_round);
-
- done:
-    clear_vec_high(s, is_q, rd);
+    gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
 }
 
 /* SHL/SLI - Vector shift left */
@@ -10537,7 +11097,7 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
     }
 
     if (insert) {
-        gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
+        gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
     } else {
         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
     }
@@ -10545,7 +11105,8 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
 
 /* USHLL/SHLL - Vector shift left with widening */
 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
-                                 int immh, int immb, int opcode, int rn, int rd)
+                                  int immh, int immb, int opcode, int rn,
+                                  int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int size = 32 - clz32(immh) - 1;
@@ -10553,7 +11114,7 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
     int shift = immhb - (8 << size);
     int dsize = 64;
     int esize = 8 << size;
-    int elements = dsize/esize;
+    int elements = dsize / esize;
     TCGv_i64 tcg_rn = new_tmp_a64(s);
     TCGv_i64 tcg_rd = new_tmp_a64(s);
     int i;
@@ -10582,15 +11143,15 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
 }
 
 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
-static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
-                                 int immh, int immb, int opcode, int rn, int rd)
+static void handle_vec_simd_shrn(DisasContext *s, bool is_q, int immh, int immb,
+                                 int opcode, int rn, int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int immhb = immh << 3 | immb;
     int size = 32 - clz32(immh) - 1;
     int dsize = 64;
     int esize = 8 << size;
-    int elements = dsize/esize;
+    int elements = dsize / esize;
     int shift = (2 * esize) - immhb;
     bool round = extract32(opcode, 0, 1);
     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
@@ -10619,11 +11180,12 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
     }
 
     for (i = 0; i < elements; i++) {
-        read_vec_element(s, tcg_rn, rn, i, size+1);
-        handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round,
-                                false, true, size+1, shift);
+        read_vec_element(s, tcg_rn, rn, i, size + 1);
+        handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, false, true,
+                                size + 1, shift);
 
-        tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, esize);
+        tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i,
+                            esize);
     }
 
     if (!is_q) {
@@ -10641,7 +11203,6 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
     clear_vec_high(s, is_q, rd);
 }
 
-
 /* AdvSIMD shift by immediate
  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
  * +---+---+---+-------------+------+------+--------+---+------+------+
@@ -10688,15 +11249,15 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
         break;
     case 0x12: /* SQSHRN / UQSHRN */
     case 0x13: /* SQRSHRN / UQRSHRN */
-        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
-                               opcode, rn, rd);
+        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, opcode,
+                               rn, rd);
         break;
     case 0x14: /* SSHLL / USHLL */
         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
         break;
     case 0x1c: /* SCVTF / UCVTF */
-        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
-                                     opcode, rn, rd);
+        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, opcode,
+                                     rn, rd);
         break;
     case 0xc: /* SQSHLU */
         if (!is_u) {
@@ -10720,13 +11281,13 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
 /* Generate code to do a "long" addition or subtraction, ie one done in
  * TCGv_i64 on vector lanes twice the width specified by size.
  */
-static void gen_neon_addl(TCGContext *tcg_ctx, int size, bool is_sub, TCGv_i64 tcg_res,
-                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
+static void gen_neon_addl(TCGContext *tcg_ctx, int size, bool is_sub,
+                          TCGv_i64 tcg_res, TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
 {
-    static NeonGenTwo64OpFn * const fns[3][2] = {
-        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
-        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
-        { tcg_gen_add_i64, tcg_gen_sub_i64 },
+    static NeonGenTwo64OpFn *const fns[3][2] = {
+        {gen_helper_neon_addl_u16, gen_helper_neon_subl_u16},
+        {gen_helper_neon_addl_u32, gen_helper_neon_subl_u32},
+        {tcg_gen_add_i64, tcg_gen_sub_i64},
     };
     NeonGenTwo64OpFn *genfn;
     assert(size < 3);
@@ -10806,23 +11367,24 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
                 tcg_gen_sub_i64(tcg_ctx, tcg_tmp1, tcg_op1, tcg_op2);
                 tcg_gen_sub_i64(tcg_ctx, tcg_tmp2, tcg_op2, tcg_op1);
                 tcg_gen_movcond_i64(tcg_ctx, is_u ? TCG_COND_GEU : TCG_COND_GE,
-                                    tcg_passres,
-                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
+                                    tcg_passres, tcg_op1, tcg_op2, tcg_tmp1,
+                                    tcg_tmp2);
                 tcg_temp_free_i64(tcg_ctx, tcg_tmp1);
                 tcg_temp_free_i64(tcg_ctx, tcg_tmp2);
                 break;
             }
-            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+            case 8:  /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
                 tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
                 break;
-            case 9: /* SQDMLAL, SQDMLAL2 */
+            case 9:  /* SQDMLAL, SQDMLAL2 */
             case 11: /* SQDMLSL, SQDMLSL2 */
             case 13: /* SQDMULL, SQDMULL2 */
                 tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
-                gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
-                                                  tcg_passres, tcg_passres);
+                gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres,
+                                                  tcg_ctx->cpu_env, tcg_passres,
+                                                  tcg_passres);
                 break;
             default:
                 g_assert_not_reached();
@@ -10833,12 +11395,15 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
                 if (accop < 0) {
                     tcg_gen_neg_i64(tcg_ctx, tcg_passres, tcg_passres);
                 }
-                gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
+                gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass],
+                                                  tcg_ctx->cpu_env,
                                                   tcg_res[pass], tcg_passres);
             } else if (accop > 0) {
-                tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
+                tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass],
+                                tcg_passres);
             } else if (accop < 0) {
-                tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
+                tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass],
+                                tcg_passres);
             }
 
             if (accop != 0) {
@@ -10870,9 +11435,9 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
             {
                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(tcg_ctx);
-                static NeonGenWidenFn * const widenfns[2][2] = {
-                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
-                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
+                static NeonGenWidenFn *const widenfns[2][2] = {
+                    {gen_helper_neon_widen_s8, gen_helper_neon_widen_u8},
+                    {gen_helper_neon_widen_s16, gen_helper_neon_widen_u16},
                 };
                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
 
@@ -10887,42 +11452,52 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
                 if (size == 0) {
                     if (is_u) {
-                        gen_helper_neon_abdl_u16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
+                        gen_helper_neon_abdl_u16(tcg_ctx, tcg_passres, tcg_op1,
+                                                 tcg_op2);
                     } else {
-                        gen_helper_neon_abdl_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
+                        gen_helper_neon_abdl_s16(tcg_ctx, tcg_passres, tcg_op1,
+                                                 tcg_op2);
                     }
                 } else {
                     if (is_u) {
-                        gen_helper_neon_abdl_u32(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
+                        gen_helper_neon_abdl_u32(tcg_ctx, tcg_passres, tcg_op1,
+                                                 tcg_op2);
                     } else {
-                        gen_helper_neon_abdl_s32(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
+                        gen_helper_neon_abdl_s32(tcg_ctx, tcg_passres, tcg_op1,
+                                                 tcg_op2);
                     }
                 }
                 break;
-            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+            case 8:  /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
                 if (size == 0) {
                     if (is_u) {
-                        gen_helper_neon_mull_u8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
+                        gen_helper_neon_mull_u8(tcg_ctx, tcg_passres, tcg_op1,
+                                                tcg_op2);
                     } else {
-                        gen_helper_neon_mull_s8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
+                        gen_helper_neon_mull_s8(tcg_ctx, tcg_passres, tcg_op1,
+                                                tcg_op2);
                     }
                 } else {
                     if (is_u) {
-                        gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
+                        gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op1,
+                                                 tcg_op2);
                     } else {
-                        gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
+                        gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1,
+                                                 tcg_op2);
                     }
                 }
                 break;
-            case 9: /* SQDMLAL, SQDMLAL2 */
+            case 9:  /* SQDMLAL, SQDMLAL2 */
             case 11: /* SQDMLSL, SQDMLSL2 */
             case 13: /* SQDMULL, SQDMULL2 */
                 assert(size == 1);
-                gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
-                gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
-                                                  tcg_passres, tcg_passres);
+                gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1,
+                                         tcg_op2);
+                gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres,
+                                                  tcg_ctx->cpu_env, tcg_passres,
+                                                  tcg_passres);
                 break;
             default:
                 g_assert_not_reached();
@@ -10934,11 +11509,12 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
                 if (opcode == 9 || opcode == 11) {
                     /* saturating accumulate ops */
                     if (accop < 0) {
-                        gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, tcg_passres);
+                        gen_helper_neon_negl_u32(tcg_ctx, tcg_passres,
+                                                 tcg_passres);
                     }
-                    gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
-                                                      tcg_res[pass],
-                                                      tcg_passres);
+                    gen_helper_neon_addl_saturate_s32(
+                        tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_res[pass],
+                        tcg_passres);
                 } else {
                     gen_neon_addl(tcg_ctx, size, (accop < 0), tcg_res[pass],
                                   tcg_res[pass], tcg_passres);
@@ -10966,10 +11542,10 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
         TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
         TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(tcg_ctx);
-        static NeonGenWidenFn * const widenfns[3][2] = {
-            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
-            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
-            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
+        static NeonGenWidenFn *const widenfns[3][2] = {
+            {gen_helper_neon_widen_s8, gen_helper_neon_widen_u8},
+            {gen_helper_neon_widen_s16, gen_helper_neon_widen_u16},
+            {tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64},
         };
         NeonGenWidenFn *widenfn = widenfns[size][is_u];
 
@@ -10978,8 +11554,8 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
         widenfn(tcg_ctx, tcg_op2_wide, tcg_op2);
         tcg_temp_free_i32(tcg_ctx, tcg_op2);
         tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
-        gen_neon_addl(tcg_ctx, size, (opcode == 3),
-                      tcg_res[pass], tcg_op1, tcg_op2_wide);
+        gen_neon_addl(tcg_ctx, size, (opcode == 3), tcg_res[pass], tcg_op1,
+                      tcg_op2_wide);
         tcg_temp_free_i64(tcg_ctx, tcg_op1);
         tcg_temp_free_i64(tcg_ctx, tcg_op2_wide);
     }
@@ -10990,7 +11566,8 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
     }
 }
 
-static void do_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i64 in)
+static void do_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 res,
+                                     TCGv_i64 in)
 {
     tcg_gen_addi_i64(tcg_ctx, in, in, 1U << 31);
     tcg_gen_extrh_i64_i32(tcg_ctx, res, in);
@@ -11008,19 +11585,20 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
         TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
         TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
         TCGv_i64 tcg_wideres = tcg_temp_new_i64(tcg_ctx);
-        static NeonGenNarrowFn * const narrowfns[3][2] = {
-            { gen_helper_neon_narrow_high_u8,
-              gen_helper_neon_narrow_round_high_u8 },
-            { gen_helper_neon_narrow_high_u16,
-              gen_helper_neon_narrow_round_high_u16 },
-            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
+        static NeonGenNarrowFn *const narrowfns[3][2] = {
+            {gen_helper_neon_narrow_high_u8,
+             gen_helper_neon_narrow_round_high_u8},
+            {gen_helper_neon_narrow_high_u16,
+             gen_helper_neon_narrow_round_high_u16},
+            {tcg_gen_extrh_i64_i32, do_narrow_round_high_u32},
         };
         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
 
         read_vec_element(s, tcg_op1, rn, pass, MO_64);
         read_vec_element(s, tcg_op2, rm, pass, MO_64);
 
-        gen_neon_addl(tcg_ctx, size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
+        gen_neon_addl(tcg_ctx, size, (opcode == 6), tcg_wideres, tcg_op1,
+                      tcg_op2);
 
         tcg_temp_free_i64(tcg_ctx, tcg_op1);
         tcg_temp_free_i64(tcg_ctx, tcg_op2);
@@ -11121,7 +11699,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
             break;
         }
         return;
-    case 9: /* SQDMLAL, SQDMLAL2 */
+    case 9:  /* SQDMLAL, SQDMLAL2 */
     case 11: /* SQDMLSL, SQDMLSL2 */
     case 13: /* SQDMULL, SQDMULL2 */
         if (is_u || size == 0) {
@@ -11129,11 +11707,11 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
             return;
         }
         /* fall through */
-    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
-    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
-    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
-    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
-    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+    case 0:  /* SADDL, SADDL2, UADDL, UADDL2 */
+    case 2:  /* SSUBL, SSUBL2, USUBL, USUBL2 */
+    case 5:  /* SABAL, SABAL2, UABAL, UABAL2 */
+    case 7:  /* SABDL, SABDL2, UABDL, UABDL2 */
+    case 8:  /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
         /* 64 x 64 -> 128 */
@@ -11243,19 +11821,24 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
                 tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
                 break;
             case 0x58: /* FMAXNMP */
-                gen_helper_vfp_maxnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_maxnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x5a: /* FADDP */
-                gen_helper_vfp_addd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_addd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
+                                    fpst);
                 break;
             case 0x5e: /* FMAXP */
-                gen_helper_vfp_maxd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_maxd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
+                                    fpst);
                 break;
             case 0x78: /* FMINNMP */
-                gen_helper_vfp_minnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_minnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x7e: /* FMINP */
-                gen_helper_vfp_mind(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_mind(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
+                                    fpst);
                 break;
             default:
                 g_assert_not_reached();
@@ -11287,7 +11870,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
             switch (opcode) {
             case 0x17: /* ADDP */
             {
-                static NeonGenTwoOpFn * const fns[3] = {
+                static NeonGenTwoOpFn *const fns[3] = {
                     gen_helper_neon_padd_u8,
                     gen_helper_neon_padd_u16,
                     tcg_gen_add_i32,
@@ -11297,39 +11880,44 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
             }
             case 0x14: /* SMAXP, UMAXP */
             {
-                static NeonGenTwoOpFn * const fns[3][2] = {
-                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
-                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
-                    { tcg_gen_smax_i32, tcg_gen_umax_i32 },
+                static NeonGenTwoOpFn *const fns[3][2] = {
+                    {gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8},
+                    {gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16},
+                    {tcg_gen_smax_i32, tcg_gen_umax_i32},
                 };
                 genfn = fns[size][u];
                 break;
             }
             case 0x15: /* SMINP, UMINP */
             {
-                static NeonGenTwoOpFn * const fns[3][2] = {
-                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
-                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
-                    { tcg_gen_smin_i32, tcg_gen_umin_i32 },
+                static NeonGenTwoOpFn *const fns[3][2] = {
+                    {gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8},
+                    {gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16},
+                    {tcg_gen_smin_i32, tcg_gen_umin_i32},
                 };
                 genfn = fns[size][u];
                 break;
             }
             /* The FP operations are all on single floats (32 bit) */
             case 0x58: /* FMAXNMP */
-                gen_helper_vfp_maxnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_maxnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x5a: /* FADDP */
-                gen_helper_vfp_adds(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_adds(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
+                                    fpst);
                 break;
             case 0x5e: /* FMAXP */
-                gen_helper_vfp_maxs(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_maxs(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
+                                    fpst);
                 break;
             case 0x78: /* FMINNMP */
-                gen_helper_vfp_minnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_minnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x7e: /* FMINP */
-                gen_helper_vfp_mins(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_mins(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
+                                    fpst);
                 break;
             default:
                 g_assert_not_reached();
@@ -11364,9 +11952,8 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
      * together indicate the operation. size[0] indicates single
      * or double.
      */
-    int fpopcode = extract32(insn, 11, 5)
-        | (extract32(insn, 23, 1) << 5)
-        | (extract32(insn, 29, 1) << 6);
+    int fpopcode = extract32(insn, 11, 5) | (extract32(insn, 23, 1) << 5) |
+                   (extract32(insn, 29, 1) << 6);
     int is_q = extract32(insn, 30, 1);
     int size = extract32(insn, 22, 1);
     int rm = extract32(insn, 16, 5);
@@ -11392,8 +11979,8 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
-        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
-                               rn, rm, rd);
+        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, rn,
+                               rm, rd);
         return;
     case 0x1b: /* FMULX */
     case 0x1f: /* FRECPS */
@@ -11432,11 +12019,10 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
             int is_s = extract32(insn, 23, 1);
             int is_2 = extract32(insn, 29, 1);
             int data = (is_2 << 1) | is_s;
-            tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd),
-                               vec_full_reg_offset(s, rn),
-                               vec_full_reg_offset(s, rm), tcg_ctx->cpu_env,
-                               is_q ? 16 : 8, vec_full_reg_size(s),
-                               data, gen_helper_gvec_fmlal_a64);
+            tcg_gen_gvec_3_ptr(
+                tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
+                vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, is_q ? 16 : 8,
+                vec_full_reg_size(s), data, gen_helper_gvec_fmlal_a64);
         }
         return;
 
@@ -11467,13 +12053,13 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
             return;
         }
         /* fall through */
-    case 0x0: /* SHADD, UHADD */
-    case 0x2: /* SRHADD, URHADD */
-    case 0x4: /* SHSUB, UHSUB */
-    case 0xc: /* SMAX, UMAX */
-    case 0xd: /* SMIN, UMIN */
-    case 0xe: /* SABD, UABD */
-    case 0xf: /* SABA, UABA */
+    case 0x0:  /* SHADD, UHADD */
+    case 0x2:  /* SRHADD, URHADD */
+    case 0x4:  /* SHSUB, UHSUB */
+    case 0xc:  /* SMAX, UMAX */
+    case 0xd:  /* SMIN, UMIN */
+    case 0xe:  /* SABD, UABD */
+    case 0xf:  /* SABA, UABA */
     case 0x12: /* MLA, MLS */
         if (size == 3) {
             unallocated_encoding(s);
@@ -11500,24 +12086,25 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
 
     switch (opcode) {
     case 0x01: /* SQADD, UQADD */
-        tcg_gen_gvec_4(tcg_ctx, vec_full_reg_offset(s, rd),
-                       offsetof(CPUARMState, vfp.qc),
-                       vec_full_reg_offset(s, rn),
-                       vec_full_reg_offset(s, rm),
-                       is_q ? 16 : 8, vec_full_reg_size(s),
-                       (u ? uqadd_op : sqadd_op) + size);
+        if (u) {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
+        } else {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
+        }
         return;
     case 0x05: /* SQSUB, UQSUB */
-        tcg_gen_gvec_4(tcg_ctx, vec_full_reg_offset(s, rd),
-                       offsetof(CPUARMState, vfp.qc),
-                       vec_full_reg_offset(s, rn),
-                       vec_full_reg_offset(s, rm),
-                       is_q ? 16 : 8, vec_full_reg_size(s),
-                       (u ? uqsub_op : sqsub_op) + size);
+        if (u) {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
+        } else {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
+        }
         return;
     case 0x08: /* SSHL, USHL */
-        gen_gvec_op3(s, is_q, rd, rn, rm,
-                     u ? &ushl_op[size] : &sshl_op[size]);
+        if (u) {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
+        } else {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
+        }
         return;
     case 0x0c: /* SMAX, UMAX */
         if (u) {
@@ -11533,6 +12120,20 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
         }
         return;
+    case 0xe: /* SABD, UABD */
+        if (u) {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
+        } else {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
+        }
+        return;
+    case 0xf: /* SABA, UABA */
+        if (u) {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
+        } else {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
+        }
+        return;
     case 0x10: /* ADD, SUB */
         if (u) {
             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
@@ -11540,23 +12141,23 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
         }
         return;
-    case 0x13: /* MUL, PMUL */
+    case 0x13:    /* MUL, PMUL */
         if (!u) { /* MUL */
             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
-        } else {  /* PMUL */
+        } else { /* PMUL */
             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
         }
         return;
     case 0x12: /* MLA, MLS */
         if (u) {
-            gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
         } else {
-            gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
         }
         return;
     case 0x11:
         if (!u) { /* CMTST */
-            gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
+            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
             return;
         }
         /* else CMEQ */
@@ -11569,8 +12170,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
         cond = u ? TCG_COND_GEU : TCG_COND_GE;
     do_gvec_cmp:
         tcg_gen_gvec_cmp(tcg_ctx, cond, size, vec_full_reg_offset(s, rd),
-                         vec_full_reg_offset(s, rn),
-                         vec_full_reg_offset(s, rm),
+                         vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm),
                          is_q ? 16 : 8, vec_full_reg_size(s));
         return;
     }
@@ -11607,80 +12207,69 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
             switch (opcode) {
             case 0x0: /* SHADD, UHADD */
             {
-                static NeonGenTwoOpFn * const fns[3][2] = {
-                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
-                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
-                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
+                static NeonGenTwoOpFn *const fns[3][2] = {
+                    {gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8},
+                    {gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16},
+                    {gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32},
                 };
                 genfn = fns[size][u];
                 break;
             }
             case 0x2: /* SRHADD, URHADD */
             {
-                static NeonGenTwoOpFn * const fns[3][2] = {
-                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
-                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
-                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
+                static NeonGenTwoOpFn *const fns[3][2] = {
+                    {gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8},
+                    {gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16},
+                    {gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32},
                 };
                 genfn = fns[size][u];
                 break;
             }
             case 0x4: /* SHSUB, UHSUB */
             {
-                static NeonGenTwoOpFn * const fns[3][2] = {
-                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
-                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
-                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
+                static NeonGenTwoOpFn *const fns[3][2] = {
+                    {gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8},
+                    {gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16},
+                    {gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32},
                 };
                 genfn = fns[size][u];
                 break;
             }
             case 0x9: /* SQSHL, UQSHL */
             {
-                static NeonGenTwoOpEnvFn * const fns[3][2] = {
-                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
-                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
-                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
+                static NeonGenTwoOpEnvFn *const fns[3][2] = {
+                    {gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8},
+                    {gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16},
+                    {gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32},
                 };
                 genenvfn = fns[size][u];
                 break;
             }
             case 0xa: /* SRSHL, URSHL */
             {
-                static NeonGenTwoOpFn * const fns[3][2] = {
-                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
-                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
-                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
+                static NeonGenTwoOpFn *const fns[3][2] = {
+                    {gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8},
+                    {gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16},
+                    {gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32},
                 };
                 genfn = fns[size][u];
                 break;
             }
             case 0xb: /* SQRSHL, UQRSHL */
             {
-                static NeonGenTwoOpEnvFn * const fns[3][2] = {
-                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
-                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
-                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
+                static NeonGenTwoOpEnvFn *const fns[3][2] = {
+                    {gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8},
+                    {gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16},
+                    {gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32},
                 };
                 genenvfn = fns[size][u];
                 break;
             }
-            case 0xe: /* SABD, UABD */
-            case 0xf: /* SABA, UABA */
-            {
-                static NeonGenTwoOpFn * const fns[3][2] = {
-                    { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
-                    { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
-                    { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
-                };
-                genfn = fns[size][u];
-                break;
-            }
             case 0x16: /* SQDMULH, SQRDMULH */
             {
-                static NeonGenTwoOpEnvFn * const fns[2][2] = {
-                    { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
-                    { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
+                static NeonGenTwoOpEnvFn *const fns[2][2] = {
+                    {gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16},
+                    {gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32},
                 };
                 assert(size == 1 || size == 2);
                 genenvfn = fns[size - 1][u];
@@ -11696,18 +12285,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
                 genfn(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
             }
 
-            if (opcode == 0xf) {
-                /* SABA, UABA: accumulating ops */
-                static NeonGenTwoOpFn * const fns[3] = {
-                    gen_helper_neon_add_u8,
-                    gen_helper_neon_add_u16,
-                    tcg_gen_add_i32,
-                };
-
-                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
-                fns[size](tcg_ctx, tcg_res, tcg_op1, tcg_res);
-            }
-
             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
 
             tcg_temp_free_i32(tcg_ctx, tcg_res);
@@ -11834,7 +12411,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
     rn = extract32(insn, 5, 5);
     rd = extract32(insn, 0, 5);
 
-    fpopcode = opcode | (a << 3) |  (u << 4);
+    fpopcode = opcode | (a << 3) | (u << 4);
     datasize = is_q ? 128 : 64;
     elements = datasize / 16;
 
@@ -11866,21 +12443,24 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
 
             switch (fpopcode) {
             case 0x10: /* FMAXNMP */
-                gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
-                                           fpst);
+                gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res[pass], tcg_op1,
+                                           tcg_op2, fpst);
                 break;
             case 0x12: /* FADDP */
-                gen_helper_advsimd_addh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_addh(tcg_ctx, tcg_res[pass], tcg_op1,
+                                        tcg_op2, fpst);
                 break;
             case 0x16: /* FMAXP */
-                gen_helper_advsimd_maxh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_maxh(tcg_ctx, tcg_res[pass], tcg_op1,
+                                        tcg_op2, fpst);
                 break;
             case 0x18: /* FMINNMP */
-                gen_helper_advsimd_minnumh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
-                                           fpst);
+                gen_helper_advsimd_minnumh(tcg_ctx, tcg_res[pass], tcg_op1,
+                                           tcg_op2, fpst);
                 break;
             case 0x1e: /* FMINP */
-                gen_helper_advsimd_minh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_minh(tcg_ctx, tcg_res[pass], tcg_op1,
+                                        tcg_op2, fpst);
                 break;
             default:
                 g_assert_not_reached();
@@ -11906,68 +12486,84 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
 
             switch (fpopcode) {
             case 0x0: /* FMAXNM */
-                gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                           fpst);
                 break;
             case 0x1: /* FMLA */
                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
-                gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_res,
-                                           fpst);
+                gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                           tcg_res, fpst);
                 break;
             case 0x2: /* FADD */
-                gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x3: /* FMULX */
-                gen_helper_advsimd_mulxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_mulxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                         fpst);
                 break;
             case 0x4: /* FCMEQ */
-                gen_helper_advsimd_ceq_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_ceq_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                           fpst);
                 break;
             case 0x6: /* FMAX */
-                gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x7: /* FRECPS */
                 gen_helper_recpsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             case 0x8: /* FMINNM */
-                gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                           fpst);
                 break;
             case 0x9: /* FMLS */
                 /* As usual for ARM, separate negation for fused multiply-add */
                 tcg_gen_xori_i32(tcg_ctx, tcg_op1, tcg_op1, 0x8000);
                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
-                gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_res,
-                                           fpst);
+                gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                           tcg_res, fpst);
                 break;
             case 0xa: /* FSUB */
-                gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0xe: /* FMIN */
-                gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0xf: /* FRSQRTS */
-                gen_helper_rsqrtsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_rsqrtsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                       fpst);
                 break;
             case 0x13: /* FMUL */
-                gen_helper_advsimd_mulh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_mulh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x14: /* FCMGE */
-                gen_helper_advsimd_cge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_cge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                           fpst);
                 break;
             case 0x15: /* FACGE */
-                gen_helper_advsimd_acge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_acge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                            fpst);
                 break;
             case 0x17: /* FDIV */
-                gen_helper_advsimd_divh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_divh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 break;
             case 0x1a: /* FABD */
-                gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                        fpst);
                 tcg_gen_andi_i32(tcg_ctx, tcg_res, tcg_res, 0x7fff);
                 break;
             case 0x1c: /* FCMGT */
-                gen_helper_advsimd_cgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_cgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                           fpst);
                 break;
             case 0x1d: /* FACGT */
-                gen_helper_advsimd_acgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_advsimd_acgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
+                                            fpst);
                 break;
             default:
                 fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
@@ -12028,9 +12624,8 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
     case 0x1b: /* FCMLA, #270 */
     case 0x1c: /* FCADD, #90 */
     case 0x1e: /* FCADD, #270 */
-        if (size == 0
-            || (size == 1 && !dc_isar_feature(aa64_fp16, s))
-            || (size == 3 && !is_q)) {
+        if (size == 0 || (size == 1 && !dc_isar_feature(aa64_fp16, s)) ||
+            (size == 3 && !is_q)) {
             unallocated_encoding(s);
             return;
         }
@@ -12050,29 +12645,11 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 
     switch (opcode) {
     case 0x0: /* SQRDMLAH (vector) */
-        switch (size) {
-        case 1:
-            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
-            break;
-        case 2:
-            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
-            break;
-        default:
-            g_assert_not_reached();
-        }
+        gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
         return;
 
     case 0x1: /* SQRDMLSH (vector) */
-        switch (size) {
-        case 1:
-            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
-            break;
-        case 2:
-            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
-            break;
-        default:
-            g_assert_not_reached();
-        }
+        gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
         return;
 
     case 0x2: /* SDOT / UDOT */
@@ -12149,7 +12726,8 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
             tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
 
             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
-            gen_helper_vfp_fcvtds(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env);
+            gen_helper_vfp_fcvtds(tcg_ctx, tcg_res[pass], tcg_op,
+                                  tcg_ctx->cpu_env);
             tcg_temp_free_i32(tcg_ctx, tcg_op);
         }
         for (pass = 0; pass < 2; pass++) {
@@ -12167,8 +12745,8 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
             tcg_res[pass] = tcg_temp_new_i32(tcg_ctx);
 
             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
-            gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_res[pass], tcg_res[pass],
-                                           fpst, ahp);
+            gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_res[pass],
+                                           tcg_res[pass], fpst, ahp);
         }
         for (pass = 0; pass < 4; pass++) {
             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
@@ -12180,8 +12758,8 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
     }
 }
 
-static void handle_rev(DisasContext *s, int opcode, bool u,
-                       bool is_q, int size, int rn, int rd)
+static void handle_rev(DisasContext *s, int opcode, bool u, bool is_q, int size,
+                       int rn, int rd)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int op = (opcode << 1) | u;
@@ -12237,10 +12815,11 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
             int off = e_rev * esize;
             read_vec_element(s, tcg_rn, rn, i, size);
             if (off >= 64) {
-                tcg_gen_deposit_i64(tcg_ctx, tcg_rd_hi, tcg_rd_hi,
-                                    tcg_rn, off - 64, esize);
+                tcg_gen_deposit_i64(tcg_ctx, tcg_rd_hi, tcg_rd_hi, tcg_rn,
+                                    off - 64, esize);
             } else {
-                tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, off, esize);
+                tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, off,
+                                    esize);
             }
         }
         write_vec_element(s, tcg_rd, rd, 0, MO_64);
@@ -12290,10 +12869,10 @@ static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
     } else {
         for (pass = 0; pass < maxpass; pass++) {
             TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
-            NeonGenOneOpFn *genfn;
-            static NeonGenOneOpFn * const fns[2][2] = {
-                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
-                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
+            NeonGenOne64OpFn *genfn;
+            static NeonGenOne64OpFn *const fns[2][2] = {
+                {gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8},
+                {gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16},
             };
 
             genfn = fns[size][u];
@@ -12334,7 +12913,7 @@ static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
     TCGv_i64 tcg_res[2];
 
     for (pass = 0; pass < 2; pass++) {
-        static NeonGenWidenFn * const widenfns[3] = {
+        static NeonGenWidenFn *const widenfns[3] = {
             gen_helper_neon_widen_u8,
             gen_helper_neon_widen_u16,
             tcg_gen_extu_i32_i64,
@@ -12477,8 +13056,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
     case 0x1c:
     case 0x1d:
     case 0x1e:
-    case 0x1f:
-    {
+    case 0x1f: {
         /* Floating point: U, size[1] and opcode indicate operation;
          * size[0] indicates single or double precision.
          */
@@ -12618,7 +13196,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                 unallocated_encoding(s);
                 return;
             }
-            need_fpstatus = true;
             break;
         case 0x1e: /* FRINT32Z */
         case 0x1f: /* FRINT64Z */
@@ -12667,6 +13244,23 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
             return;
         }
         break;
+    case 0x8: /* CMGT, CMGE */
+        if (u) {
+            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
+        } else {
+            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
+        }
+        return;
+    case 0x9: /* CMEQ, CMLE */
+        if (u) {
+            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
+        } else {
+            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
+        }
+        return;
+    case 0xa: /* CMLT */
+        gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
+        return;
     case 0xb:
         if (u) { /* ABS, NEG */
             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
@@ -12690,8 +13284,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
 
             read_vec_element(s, tcg_op, rn, pass, MO_64);
 
-            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
-                            tcg_rmode, tcg_fpstatus);
+            handle_2misc_64(s, opcode, u, tcg_res, tcg_op, tcg_rmode,
+                            tcg_fpstatus);
 
             write_vec_element(s, tcg_res, rd, pass, MO_64);
 
@@ -12704,29 +13298,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
             TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
             TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
-            TCGCond cond;
 
             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
 
             if (size == 2) {
                 /* Special cases for 32 bit elements */
                 switch (opcode) {
-                case 0xa: /* CMLT */
-                    /* 32 bit integer comparison against zero, result is
-                     * test ? (2^32 - 1) : 0. We implement via setcond(test)
-                     * and inverting.
-                     */
-                    cond = TCG_COND_LT;
-                do_cmop:
-                    tcg_gen_setcondi_i32(tcg_ctx, cond, tcg_res, tcg_op, 0);
-                    tcg_gen_neg_i32(tcg_ctx, tcg_res, tcg_res);
-                    break;
-                case 0x8: /* CMGT, CMGE */
-                    cond = u ? TCG_COND_GE : TCG_COND_GT;
-                    goto do_cmop;
-                case 0x9: /* CMEQ, CMLE */
-                    cond = u ? TCG_COND_LE : TCG_COND_EQ;
-                    goto do_cmop;
                 case 0x4: /* CLS */
                     if (u) {
                         tcg_gen_clzi_i32(tcg_ctx, tcg_res, tcg_op, 32);
@@ -12736,9 +13313,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                     break;
                 case 0x7: /* SQABS, SQNEG */
                     if (u) {
-                        gen_helper_neon_qneg_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op);
+                        gen_helper_neon_qneg_s32(tcg_ctx, tcg_res,
+                                                 tcg_ctx->cpu_env, tcg_op);
                     } else {
-                        gen_helper_neon_qabs_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op);
+                        gen_helper_neon_qabs_s32(tcg_ctx, tcg_res,
+                                                 tcg_ctx->cpu_env, tcg_op);
                     }
                     break;
                 case 0x2f: /* FABS */
@@ -12748,7 +13327,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                     gen_helper_vfp_negs(tcg_ctx, tcg_res, tcg_op);
                     break;
                 case 0x7f: /* FSQRT */
-                    gen_helper_vfp_sqrts(tcg_ctx, tcg_res, tcg_op, tcg_ctx->cpu_env);
+                    gen_helper_vfp_sqrts(tcg_ctx, tcg_res, tcg_op,
+                                         tcg_ctx->cpu_env);
                     break;
                 case 0x1a: /* FCVTNS */
                 case 0x1b: /* FCVTMS */
@@ -12757,8 +13337,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                 case 0x3b: /* FCVTZS */
                 {
                     TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
-                    gen_helper_vfp_tosls(tcg_ctx, tcg_res, tcg_op,
-                                         tcg_shift, tcg_fpstatus);
+                    gen_helper_vfp_tosls(tcg_ctx, tcg_res, tcg_op, tcg_shift,
+                                         tcg_fpstatus);
                     tcg_temp_free_i32(tcg_ctx, tcg_shift);
                     break;
                 }
@@ -12769,8 +13349,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                 case 0x7b: /* FCVTZU */
                 {
                     TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
-                    gen_helper_vfp_touls(tcg_ctx, tcg_res, tcg_op,
-                                         tcg_shift, tcg_fpstatus);
+                    gen_helper_vfp_touls(tcg_ctx, tcg_res, tcg_op, tcg_shift,
+                                         tcg_fpstatus);
                     tcg_temp_free_i32(tcg_ctx, tcg_shift);
                     break;
                 }
@@ -12783,18 +13363,21 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                     gen_helper_rints(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
                     break;
                 case 0x59: /* FRINTX */
-                    gen_helper_rints_exact(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
+                    gen_helper_rints_exact(tcg_ctx, tcg_res, tcg_op,
+                                           tcg_fpstatus);
                     break;
                 case 0x7c: /* URSQRTE */
-                    gen_helper_rsqrte_u32(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
+                    gen_helper_rsqrte_u32(tcg_ctx, tcg_res, tcg_op);
                     break;
                 case 0x1e: /* FRINT32Z */
                 case 0x5e: /* FRINT32X */
-                    gen_helper_frint32_s(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
+                    gen_helper_frint32_s(tcg_ctx, tcg_res, tcg_op,
+                                         tcg_fpstatus);
                     break;
                 case 0x1f: /* FRINT64Z */
                 case 0x5f: /* FRINT64X */
-                    gen_helper_frint64_s(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
+                    gen_helper_frint64_s(tcg_ctx, tcg_res, tcg_op,
+                                         tcg_fpstatus);
                     break;
                 default:
                     g_assert_not_reached();
@@ -12815,44 +13398,14 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                 case 0x7: /* SQABS, SQNEG */
                 {
                     NeonGenOneOpEnvFn *genfn;
-                    static NeonGenOneOpEnvFn * const fns[2][2] = {
-                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
-                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
+                    static NeonGenOneOpEnvFn *const fns[2][2] = {
+                        {gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8},
+                        {gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16},
                     };
                     genfn = fns[size][u];
                     genfn(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op);
                     break;
                 }
-                case 0x8: /* CMGT, CMGE */
-                case 0x9: /* CMEQ, CMLE */
-                case 0xa: /* CMLT */
-                {
-                    static NeonGenTwoOpFn * const fns[3][2] = {
-                        { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
-                        { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
-                        { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
-                    };
-                    NeonGenTwoOpFn *genfn;
-                    int comp;
-                    bool reverse;
-                    TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0);
-
-                    /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
-                    comp = (opcode - 0x8) * 2 + u;
-                    /* ...but LE, LT are implemented as reverse GE, GT */
-                    reverse = (comp > 2);
-                    if (reverse) {
-                        comp = 4 - comp;
-                    }
-                    genfn = fns[comp][size];
-                    if (reverse) {
-                        genfn(tcg_ctx, tcg_res, tcg_zero, tcg_op);
-                    } else {
-                        genfn(tcg_ctx, tcg_res, tcg_op, tcg_zero);
-                    }
-                    tcg_temp_free_i32(tcg_ctx, tcg_zero);
-                    break;
-                }
                 case 0x4: /* CLS, CLZ */
                     if (u) {
                         if (size == 0) {
@@ -12957,8 +13510,7 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
         }
         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
         return;
-    }
-    break;
+    } break;
     case 0x2c: /* FCMGT (zero) */
     case 0x2d: /* FCMEQ (zero) */
     case 0x2e: /* FCMLT (zero) */
@@ -13051,7 +13603,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
         g_assert_not_reached();
     }
 
-
     /* Check additional constraints for the scalar encoding */
     if (is_scalar) {
         if (!is_q) {
@@ -13088,7 +13639,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
         case 0x1c: /* FCVTAS */
         case 0x3a: /* FCVTPS */
         case 0x3b: /* FCVTZS */
-            gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
+            gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op,
+                                          tcg_fpstatus);
             break;
         case 0x3d: /* FRECPE */
             gen_helper_recpe_f16(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
@@ -13101,7 +13653,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
         case 0x5c: /* FCVTAU */
         case 0x7a: /* FCVTPU */
         case 0x7b: /* FCVTZU */
-            gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
+            gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op,
+                                          tcg_fpstatus);
             break;
         case 0x6f: /* FNEG */
             tcg_gen_xori_i32(tcg_ctx, tcg_res, tcg_op, 0x8000);
@@ -13132,7 +13685,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
             case 0x1c: /* FCVTAS */
             case 0x3a: /* FCVTPS */
             case 0x3b: /* FCVTZS */
-                gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
+                gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op,
+                                              tcg_fpstatus);
                 break;
             case 0x3d: /* FRECPE */
                 gen_helper_recpe_f16(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
@@ -13142,7 +13696,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
             case 0x5c: /* FCVTAU */
             case 0x7a: /* FCVTPU */
             case 0x7b: /* FCVTZU */
-                gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
+                gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op,
+                                              tcg_fpstatus);
                 break;
             case 0x18: /* FRINTN */
             case 0x19: /* FRINTM */
@@ -13150,10 +13705,12 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
             case 0x39: /* FRINTZ */
             case 0x58: /* FRINTA */
             case 0x79: /* FRINTI */
-                gen_helper_advsimd_rinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
+                gen_helper_advsimd_rinth(tcg_ctx, tcg_res, tcg_op,
+                                         tcg_fpstatus);
                 break;
             case 0x59: /* FRINTX */
-                gen_helper_advsimd_rinth_exact(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
+                gen_helper_advsimd_rinth_exact(tcg_ctx, tcg_res, tcg_op,
+                                               tcg_fpstatus);
                 break;
             case 0x2f: /* FABS */
                 tcg_gen_andi_i32(tcg_ctx, tcg_res, tcg_op, 0x7fff);
@@ -13390,40 +13947,38 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
     case 0x1e: /* UDOT */
         gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
                          u ? gen_helper_gvec_udot_idx_b
-                         : gen_helper_gvec_sdot_idx_b);
+                           : gen_helper_gvec_sdot_idx_b);
         return;
     case 0x11: /* FCMLA #0 */
     case 0x13: /* FCMLA #90 */
     case 0x15: /* FCMLA #180 */
     case 0x17: /* FCMLA #270 */
-        {
-            int rot = extract32(insn, 13, 2);
-            int data = (index << 2) | rot;
-            tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd),
-                               vec_full_reg_offset(s, rn),
-                               vec_full_reg_offset(s, rm), fpst,
-                               is_q ? 16 : 8, vec_full_reg_size(s), data,
-                               size == MO_64
-                               ? gen_helper_gvec_fcmlas_idx
-                               : gen_helper_gvec_fcmlah_idx);
-            tcg_temp_free_ptr(tcg_ctx, fpst);
-        }
+    {
+        int rot = extract32(insn, 13, 2);
+        int data = (index << 2) | rot;
+        tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd),
+                           vec_full_reg_offset(s, rn),
+                           vec_full_reg_offset(s, rm), fpst, is_q ? 16 : 8,
+                           vec_full_reg_size(s), data,
+                           size == MO_64 ? gen_helper_gvec_fcmlas_idx
+                                         : gen_helper_gvec_fcmlah_idx);
+        tcg_temp_free_ptr(tcg_ctx, fpst);
+    }
         return;
 
     case 0x00: /* FMLAL */
     case 0x04: /* FMLSL */
     case 0x18: /* FMLAL2 */
     case 0x1c: /* FMLSL2 */
-        {
-            int is_s = extract32(opcode, 2, 1);
-            int is_2 = u;
-            int data = (index << 2) | (is_2 << 1) | is_s;
-            tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd),
-                               vec_full_reg_offset(s, rn),
-                               vec_full_reg_offset(s, rm), tcg_ctx->cpu_env,
-                               is_q ? 16 : 8, vec_full_reg_size(s),
-                               data, gen_helper_gvec_fmlal_idx_a64);
-        }
+    {
+        int is_s = extract32(opcode, 2, 1);
+        int is_2 = u;
+        int data = (index << 2) | (is_2 << 1) | is_s;
+        tcg_gen_gvec_3_ptr(
+            tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
+            vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, is_q ? 16 : 8,
+            vec_full_reg_size(s), data, gen_helper_gvec_fmlal_idx_a64);
+    }
         return;
     }
 
@@ -13448,7 +14003,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                 /* fall through */
             case 0x01: /* FMLA */
                 read_vec_element(s, tcg_res, rd, pass, MO_64);
-                gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+                gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op, tcg_idx,
+                                       tcg_res, fpst);
                 break;
             case 0x09: /* FMUL */
                 gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
@@ -13502,9 +14058,9 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
             case 0x10: /* MLA */
             case 0x14: /* MLS */
             {
-                static NeonGenTwoOpFn * const fns[2][2] = {
-                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
-                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
+                static NeonGenTwoOpFn *const fns[2][2] = {
+                    {gen_helper_neon_add_u16, gen_helper_neon_sub_u16},
+                    {tcg_gen_add_i32, tcg_gen_sub_i32},
                 };
                 NeonGenTwoOpFn *genfn;
                 bool is_sub = opcode == 0x4;
@@ -13534,11 +14090,11 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                         tcg_gen_xori_i32(tcg_ctx, tcg_op, tcg_op, 0x80008000);
                     }
                     if (is_scalar) {
-                        gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op, tcg_idx,
-                                                   tcg_res, fpst);
+                        gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op,
+                                                   tcg_idx, tcg_res, fpst);
                     } else {
-                        gen_helper_advsimd_muladd2h(tcg_ctx, tcg_res, tcg_op, tcg_idx,
-                                                    tcg_res, fpst);
+                        gen_helper_advsimd_muladd2h(tcg_ctx, tcg_res, tcg_op,
+                                                    tcg_idx, tcg_res, fpst);
                     }
                     break;
                 case 2:
@@ -13566,7 +14122,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 2:
-                    gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
+                    gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op, tcg_idx,
+                                        fpst);
                     break;
                 default:
                     g_assert_not_reached();
@@ -13584,7 +14141,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 2:
-                    gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
+                    gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op, tcg_idx,
+                                         fpst);
                     break;
                 default:
                     g_assert_not_reached();
@@ -13592,42 +14150,46 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                 break;
             case 0x0c: /* SQDMULH */
                 if (size == 1) {
-                    gen_helper_neon_qdmulh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
-                                               tcg_op, tcg_idx);
+                    gen_helper_neon_qdmulh_s16(
+                        tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx);
                 } else {
-                    gen_helper_neon_qdmulh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
-                                               tcg_op, tcg_idx);
+                    gen_helper_neon_qdmulh_s32(
+                        tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx);
                 }
                 break;
             case 0x0d: /* SQRDMULH */
                 if (size == 1) {
-                    gen_helper_neon_qrdmulh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
-                                                tcg_op, tcg_idx);
+                    gen_helper_neon_qrdmulh_s16(
+                        tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx);
                 } else {
-                    gen_helper_neon_qrdmulh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
-                                                tcg_op, tcg_idx);
+                    gen_helper_neon_qrdmulh_s32(
+                        tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx);
                 }
                 break;
             case 0x1d: /* SQRDMLAH */
                 read_vec_element_i32(s, tcg_res, rd, pass,
                                      is_scalar ? size : MO_32);
                 if (size == 1) {
-                    gen_helper_neon_qrdmlah_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
-                                                tcg_op, tcg_idx, tcg_res);
+                    gen_helper_neon_qrdmlah_s16(tcg_ctx, tcg_res,
+                                                tcg_ctx->cpu_env, tcg_op,
+                                                tcg_idx, tcg_res);
                 } else {
-                    gen_helper_neon_qrdmlah_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
-                                                tcg_op, tcg_idx, tcg_res);
+                    gen_helper_neon_qrdmlah_s32(tcg_ctx, tcg_res,
+                                                tcg_ctx->cpu_env, tcg_op,
+                                                tcg_idx, tcg_res);
                 }
                 break;
             case 0x1f: /* SQRDMLSH */
                 read_vec_element_i32(s, tcg_res, rd, pass,
                                      is_scalar ? size : MO_32);
                 if (size == 1) {
-                    gen_helper_neon_qrdmlsh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
-                                                tcg_op, tcg_idx, tcg_res);
+                    gen_helper_neon_qrdmlsh_s16(tcg_ctx, tcg_res,
+                                                tcg_ctx->cpu_env, tcg_op,
+                                                tcg_idx, tcg_res);
                 } else {
-                    gen_helper_neon_qrdmlsh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
-                                                tcg_op, tcg_idx, tcg_res);
+                    gen_helper_neon_qrdmlsh_s32(tcg_ctx, tcg_res,
+                                                tcg_ctx->cpu_env, tcg_op,
+                                                tcg_idx, tcg_res);
                 }
                 break;
             default:
@@ -13689,7 +14251,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
 
                 if (satop) {
                     /* saturating, doubling */
-                    gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
+                    gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres,
+                                                      tcg_ctx->cpu_env,
                                                       tcg_passres, tcg_passres);
                 }
 
@@ -13702,18 +14265,20 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
 
                 switch (opcode) {
                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
-                    tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
+                    tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass],
+                                    tcg_passres);
                     break;
                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
-                    tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
+                    tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass],
+                                    tcg_passres);
                     break;
                 case 0x7: /* SQDMLSL, SQDMLSL2 */
                     tcg_gen_neg_i64(tcg_ctx, tcg_passres, tcg_passres);
                     /* fall through */
                 case 0x3: /* SQDMLAL, SQDMLAL2 */
-                    gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
-                                                      tcg_res[pass],
-                                                      tcg_passres);
+                    gen_helper_neon_addl_saturate_s64(
+                        tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_res[pass],
+                        tcg_passres);
                     break;
                 default:
                     g_assert_not_reached();
@@ -13744,8 +14309,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                 if (is_scalar) {
                     read_vec_element_i32(s, tcg_op, rn, pass, size);
                 } else {
-                    read_vec_element_i32(s, tcg_op, rn,
-                                         pass + (is_q * 2), MO_32);
+                    read_vec_element_i32(s, tcg_op, rn, pass + (is_q * 2),
+                                         MO_32);
                 }
 
                 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
@@ -13758,12 +14323,15 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                 }
 
                 if (memop & MO_SIGN) {
-                    gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op, tcg_idx);
+                    gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op,
+                                             tcg_idx);
                 } else {
-                    gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op, tcg_idx);
+                    gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op,
+                                             tcg_idx);
                 }
                 if (satop) {
-                    gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
+                    gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres,
+                                                      tcg_ctx->cpu_env,
                                                       tcg_passres, tcg_passres);
                 }
                 tcg_temp_free_i32(tcg_ctx, tcg_op);
@@ -13777,20 +14345,20 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
 
                 switch (opcode) {
                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
-                    gen_helper_neon_addl_u32(tcg_ctx, tcg_res[pass], tcg_res[pass],
-                                             tcg_passres);
+                    gen_helper_neon_addl_u32(tcg_ctx, tcg_res[pass],
+                                             tcg_res[pass], tcg_passres);
                     break;
                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
-                    gen_helper_neon_subl_u32(tcg_ctx, tcg_res[pass], tcg_res[pass],
-                                             tcg_passres);
+                    gen_helper_neon_subl_u32(tcg_ctx, tcg_res[pass],
+                                             tcg_res[pass], tcg_passres);
                     break;
                 case 0x7: /* SQDMLSL, SQDMLSL2 */
                     gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, tcg_passres);
                     /* fall through */
                 case 0x3: /* SQDMLAL, SQDMLAL2 */
-                    gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
-                                                      tcg_res[pass],
-                                                      tcg_passres);
+                    gen_helper_neon_addl_saturate_s32(
+                        tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_res[pass],
+                        tcg_passres);
                     break;
                 default:
                     g_assert_not_reached();
@@ -13827,15 +14395,13 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
  */
 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int size = extract32(insn, 22, 2);
     int opcode = extract32(insn, 12, 5);
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
     int decrypt;
-    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
-    TCGv_i32 tcg_decrypt;
-    CryptoThreeOpIntFn *genfn;
+    gen_helper_gvec_2 *genfn2 = NULL;
+    gen_helper_gvec_3 *genfn3 = NULL;
 
     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
         unallocated_encoding(s);
@@ -13845,19 +14411,19 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
     switch (opcode) {
     case 0x4: /* AESE */
         decrypt = 0;
-        genfn = gen_helper_crypto_aese;
+        genfn3 = gen_helper_crypto_aese;
         break;
     case 0x6: /* AESMC */
         decrypt = 0;
-        genfn = gen_helper_crypto_aesmc;
+        genfn2 = gen_helper_crypto_aesmc;
         break;
     case 0x5: /* AESD */
         decrypt = 1;
-        genfn = gen_helper_crypto_aese;
+        genfn3 = gen_helper_crypto_aese;
         break;
     case 0x7: /* AESIMC */
         decrypt = 1;
-        genfn = gen_helper_crypto_aesmc;
+        genfn2 = gen_helper_crypto_aesmc;
         break;
     default:
         unallocated_encoding(s);
@@ -13868,15 +14434,11 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
         return;
     }
 
-    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
-    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
-    tcg_decrypt = tcg_const_i32(tcg_ctx, decrypt);
-
-    genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
-
-    tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr);
-    tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr);
-    tcg_temp_free_i32(tcg_ctx, tcg_decrypt);
+    if (genfn2) {
+        gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
+    } else {
+        gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
+    }
 }
 
 /* Crypto three-reg SHA
@@ -13887,14 +14449,12 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
  */
 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int size = extract32(insn, 22, 2);
     int opcode = extract32(insn, 12, 3);
     int rm = extract32(insn, 16, 5);
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
-    CryptoThreeOpFn *genfn;
-    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
+    gen_helper_gvec_3 *genfn;
     bool feature;
 
     if (size != 0) {
@@ -13904,10 +14464,19 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
 
     switch (opcode) {
     case 0: /* SHA1C */
+        genfn = gen_helper_crypto_sha1c;
+        feature = dc_isar_feature(aa64_sha1, s);
+        break;
     case 1: /* SHA1P */
+        genfn = gen_helper_crypto_sha1p;
+        feature = dc_isar_feature(aa64_sha1, s);
+        break;
     case 2: /* SHA1M */
+        genfn = gen_helper_crypto_sha1m;
+        feature = dc_isar_feature(aa64_sha1, s);
+        break;
     case 3: /* SHA1SU0 */
-        genfn = NULL;
+        genfn = gen_helper_crypto_sha1su0;
         feature = dc_isar_feature(aa64_sha1, s);
         break;
     case 4: /* SHA256H */
@@ -13936,23 +14505,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
         return;
     }
 
-    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
-    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
-    tcg_rm_ptr = vec_full_reg_ptr(s, rm);
-
-    if (genfn) {
-        genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
-    } else {
-        TCGv_i32 tcg_opcode = tcg_const_i32(tcg_ctx, opcode);
-
-        gen_helper_crypto_sha1_3reg(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr,
-                                    tcg_rm_ptr, tcg_opcode);
-        tcg_temp_free_i32(tcg_ctx, tcg_opcode);
-    }
-
-    tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr);
-    tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr);
-    tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr);
+    gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
 }
 
 /* Crypto two-reg SHA
@@ -13963,14 +14516,12 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
  */
 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int size = extract32(insn, 22, 2);
     int opcode = extract32(insn, 12, 5);
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
-    CryptoTwoOpFn *genfn;
+    gen_helper_gvec_2 *genfn;
     bool feature;
-    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
 
     if (size != 0) {
         unallocated_encoding(s);
@@ -14003,14 +14554,36 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
     if (!fp_access_check(s)) {
         return;
     }
+    gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
+}
 
-    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
-    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
+static void gen_rax1_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 n,
+                         TCGv_i64 m)
+{
+    tcg_gen_rotli_i64(tcg_ctx, d, m, 1);
+    tcg_gen_xor_i64(tcg_ctx, d, d, n);
+}
 
-    genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr);
+static void gen_rax1_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d,
+                         TCGv_vec n, TCGv_vec m)
+{
+    tcg_gen_rotli_vec(tcg_ctx, vece, d, m, 1);
+    tcg_gen_xor_vec(tcg_ctx, vece, d, d, n);
+}
 
-    tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr);
-    tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr);
+void gen_gvec_rax1(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs,
+                   uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz,
+                   uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {INDEX_op_rotli_vec, 0};
+    static const GVecGen3 op = {
+        .fni8 = gen_rax1_i64,
+        .fniv = gen_rax1_vec,
+        .opt_opc = vecop_list,
+        .fno = gen_helper_crypto_rax1,
+        .vece = MO_64,
+    };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
 }
 
 /* Crypto three-reg SHA512
@@ -14021,32 +14594,32 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
  */
 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int opcode = extract32(insn, 10, 2);
-    int o =  extract32(insn, 14, 1);
+    int o = extract32(insn, 14, 1);
     int rm = extract32(insn, 16, 5);
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
     bool feature;
-    CryptoThreeOpFn *genfn;
+    gen_helper_gvec_3 *oolfn = NULL;
+    GVecGen3Fn *gvecfn = NULL;
 
     if (o == 0) {
         switch (opcode) {
         case 0: /* SHA512H */
             feature = dc_isar_feature(aa64_sha512, s);
-            genfn = gen_helper_crypto_sha512h;
+            oolfn = gen_helper_crypto_sha512h;
             break;
         case 1: /* SHA512H2 */
             feature = dc_isar_feature(aa64_sha512, s);
-            genfn = gen_helper_crypto_sha512h2;
+            oolfn = gen_helper_crypto_sha512h2;
             break;
         case 2: /* SHA512SU1 */
             feature = dc_isar_feature(aa64_sha512, s);
-            genfn = gen_helper_crypto_sha512su1;
+            oolfn = gen_helper_crypto_sha512su1;
             break;
         case 3: /* RAX1 */
             feature = dc_isar_feature(aa64_sha3, s);
-            genfn = NULL;
+            gvecfn = gen_gvec_rax1;
             break;
         default:
             g_assert_not_reached();
@@ -14055,15 +14628,15 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
         switch (opcode) {
         case 0: /* SM3PARTW1 */
             feature = dc_isar_feature(aa64_sm3, s);
-            genfn = gen_helper_crypto_sm3partw1;
+            oolfn = gen_helper_crypto_sm3partw1;
             break;
         case 1: /* SM3PARTW2 */
             feature = dc_isar_feature(aa64_sm3, s);
-            genfn = gen_helper_crypto_sm3partw2;
+            oolfn = gen_helper_crypto_sm3partw2;
             break;
         case 2: /* SM4EKEY */
             feature = dc_isar_feature(aa64_sm4, s);
-            genfn = gen_helper_crypto_sm4ekey;
+            oolfn = gen_helper_crypto_sm4ekey;
             break;
         default:
             unallocated_encoding(s);
@@ -14080,41 +14653,10 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
         return;
     }
 
-    if (genfn) {
-        TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
-
-        tcg_rd_ptr = vec_full_reg_ptr(s, rd);
-        tcg_rn_ptr = vec_full_reg_ptr(s, rn);
-        tcg_rm_ptr = vec_full_reg_ptr(s, rm);
-
-        genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
-
-        tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr);
-        tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr);
-        tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr);
+    if (oolfn) {
+        gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
     } else {
-        TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
-        int pass;
-
-        tcg_op1 = tcg_temp_new_i64(tcg_ctx);
-        tcg_op2 = tcg_temp_new_i64(tcg_ctx);
-        tcg_res[0] = tcg_temp_new_i64(tcg_ctx);
-        tcg_res[1] = tcg_temp_new_i64(tcg_ctx);
-
-        for (pass = 0; pass < 2; pass++) {
-            read_vec_element(s, tcg_op1, rn, pass, MO_64);
-            read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
-            tcg_gen_rotli_i64(tcg_ctx, tcg_res[pass], tcg_op2, 1);
-            tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_op1);
-        }
-        write_vec_element(s, tcg_res[0], rd, 0, MO_64);
-        write_vec_element(s, tcg_res[1], rd, 1, MO_64);
-
-        tcg_temp_free_i64(tcg_ctx, tcg_op1);
-        tcg_temp_free_i64(tcg_ctx, tcg_op2);
-        tcg_temp_free_i64(tcg_ctx, tcg_res[0]);
-        tcg_temp_free_i64(tcg_ctx, tcg_res[1]);
+        gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
     }
 }
 
@@ -14126,22 +14668,17 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
  */
 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int opcode = extract32(insn, 10, 2);
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
-    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
     bool feature;
-    CryptoTwoOpFn *genfn;
 
     switch (opcode) {
     case 0: /* SHA512SU0 */
         feature = dc_isar_feature(aa64_sha512, s);
-        genfn = gen_helper_crypto_sha512su0;
         break;
     case 1: /* SM4E */
         feature = dc_isar_feature(aa64_sm4, s);
-        genfn = gen_helper_crypto_sm4e;
         break;
     default:
         unallocated_encoding(s);
@@ -14157,13 +14694,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
         return;
     }
 
-    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
-    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
-
-    genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr);
-
-    tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr);
-    tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr);
+    switch (opcode) {
+    case 0: /* SHA512SU0 */
+        gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
+        break;
+    case 1: /* SM4E */
+        gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
+        break;
+    default:
+        g_assert_not_reached();
+    }
 }
 
 /* Crypto four-register
@@ -14321,14 +14861,17 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn)
  */
 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    static gen_helper_gvec_3 *const fns[4] = {
+        gen_helper_crypto_sm3tt1a,
+        gen_helper_crypto_sm3tt1b,
+        gen_helper_crypto_sm3tt2a,
+        gen_helper_crypto_sm3tt2b,
+    };
     int opcode = extract32(insn, 10, 2);
     int imm2 = extract32(insn, 12, 2);
     int rm = extract32(insn, 16, 5);
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
-    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
-    TCGv_i32 tcg_imm2, tcg_opcode;
 
     if (!dc_isar_feature(aa64_sm3, s)) {
         unallocated_encoding(s);
@@ -14339,20 +14882,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
         return;
     }
 
-    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
-    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
-    tcg_rm_ptr = vec_full_reg_ptr(s, rm);
-    tcg_imm2   = tcg_const_i32(tcg_ctx, imm2);
-    tcg_opcode = tcg_const_i32(tcg_ctx, opcode);
-
-    gen_helper_crypto_sm3tt(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
-                            tcg_opcode);
-
-    tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr);
-    tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr);
-    tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr);
-    tcg_temp_free_i32(tcg_ctx, tcg_imm2);
-    tcg_temp_free_i32(tcg_ctx, tcg_opcode);
+    gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
 }
 
 /* C3.6 Data processing - SIMD, inc Crypto
@@ -14362,40 +14892,39 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
  */
 static const AArch64DecodeTable data_proc_simd[] = {
     /* pattern  ,  mask     ,  fn                        */
-    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
-    { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
-    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
-    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
-    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
-    { 0x0e000400, 0x9fe08400, disas_simd_copy },
-    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
+    {0x0e200400, 0x9f200400, disas_simd_three_reg_same},
+    {0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra},
+    {0x0e200000, 0x9f200c00, disas_simd_three_reg_diff},
+    {0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc},
+    {0x0e300800, 0x9f3e0c00, disas_simd_across_lanes},
+    {0x0e000400, 0x9fe08400, disas_simd_copy},
+    {0x0f000000, 0x9f000400, disas_simd_indexed}, /* vector indexed */
     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
-    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
-    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
-    { 0x0e000000, 0xbf208c00, disas_simd_tb },
-    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
-    { 0x2e000000, 0xbf208400, disas_simd_ext },
-    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
-    { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
-    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
-    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
-    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
-    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
-    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
-    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
-    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
-    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
-    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
-    { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
-    { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
-    { 0xce000000, 0xff808000, disas_crypto_four_reg },
-    { 0xce800000, 0xffe00000, disas_crypto_xar },
-    { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
-    { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
-    { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
-    { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
-    { 0x00000000, 0x00000000, NULL }
-};
+    {0x0f000400, 0x9ff80400, disas_simd_mod_imm},
+    {0x0f000400, 0x9f800400, disas_simd_shift_imm},
+    {0x0e000000, 0xbf208c00, disas_simd_tb},
+    {0x0e000800, 0xbf208c00, disas_simd_zip_trn},
+    {0x2e000000, 0xbf208400, disas_simd_ext},
+    {0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same},
+    {0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra},
+    {0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff},
+    {0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc},
+    {0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise},
+    {0x5e000400, 0xdfe08400, disas_simd_scalar_copy},
+    {0x5f000000, 0xdf000400, disas_simd_indexed}, /* scalar indexed */
+    {0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm},
+    {0x4e280800, 0xff3e0c00, disas_crypto_aes},
+    {0x5e000000, 0xff208c00, disas_crypto_three_reg_sha},
+    {0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha},
+    {0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512},
+    {0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512},
+    {0xce000000, 0xff808000, disas_crypto_four_reg},
+    {0xce800000, 0xffe00000, disas_crypto_xar},
+    {0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2},
+    {0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16},
+    {0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16},
+    {0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16},
+    {0x00000000, 0x00000000, NULL}};
 
 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
 {
@@ -14447,7 +14976,7 @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s)
      * table entry even for that case.
      */
     return (tlb_hit(s->uc, entry->addr_code, addr) &&
-            env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0);
+            arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs));
 }
 
 /**
@@ -14551,9 +15080,8 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
              * everything else.  This allows us to handle this now
              * instead of waiting until the insn is otherwise decoded.
              */
-            if (s->btype != 0
-                && s->guarded_page
-                && !btype_destination_ok(insn, s->bt, s->btype)) {
+            if (s->btype != 0 && s->guarded_page &&
+                !btype_destination_ok(insn, s->bt, s->btype)) {
                 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
                                    syn_btitrap(s->btype),
                                    default_exception_el(s));
@@ -14566,7 +15094,9 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
     }
 
     switch (extract32(insn, 25, 4)) {
-    case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
+    case 0x0:
+    case 0x1:
+    case 0x3: /* UNALLOCATED */
         unallocated_encoding(s);
         break;
     case 0x2:
@@ -14574,24 +15104,26 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
             unallocated_encoding(s);
         }
         break;
-    case 0x8: case 0x9: /* Data processing - immediate */
+    case 0x8:
+    case 0x9: /* Data processing - immediate */
         disas_data_proc_imm(s, insn);
         break;
-    case 0xa: case 0xb: /* Branch, exception generation and system insns */
+    case 0xa:
+    case 0xb: /* Branch, exception generation and system insns */
         disas_b_exc_sys(s, insn);
         break;
     case 0x4:
     case 0x6:
     case 0xc:
-    case 0xe:      /* Loads and stores */
+    case 0xe: /* Loads and stores */
         disas_ldst(s, insn);
         break;
     case 0x5:
-    case 0xd:      /* Data processing - register */
+    case 0xd: /* Data processing - register */
         disas_data_proc_reg(s, insn);
         break;
     case 0x7:
-    case 0xf:      /* Data processing - SIMD and floating point */
+    case 0xf: /* Data processing - SIMD and floating point */
         disas_data_proc_simd_fp(s, insn);
         break;
     default:
@@ -14630,8 +15162,8 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
      * there is no secure EL1, so we route exceptions to EL3.
      */
-    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
-                               !arm_el_is_aa64(env, 3);
+    dc->secure_routed_to_el3 =
+        arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3);
     dc->thumb = 0;
     dc->sctlr_b = 0;
     dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
@@ -14640,7 +15172,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
     dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
-    dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID);
+    dc->tcma = FIELD_EX32(tb_flags, TBFLAG_A64, TCMA);
     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
     dc->user = (dc->current_el == 0);
     dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
@@ -14650,10 +15182,14 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
     dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
     dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
     dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV);
+    dc->ata = FIELD_EX32(tb_flags, TBFLAG_A64, ATA);
+    dc->mte_active[0] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE);
+    dc->mte_active[1] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE0_ACTIVE);
     dc->vec_len = 0;
     dc->vec_stride = 0;
     dc->cp_regs = arm_cpu->cp_regs;
     dc->features = env->features;
+    dc->dcz_blocksize = arm_cpu->dcz_blocksize;
 
     /* Single step state. The code-generation logic here is:
      *  SS_ACTIVE == 0:
@@ -14691,9 +15227,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
     init_tmp_a64_array(dc);
 }
 
-static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
-{
-}
+static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) {}
 
 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
 {
@@ -14794,12 +15328,15 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
             gen_goto_tb(dc, 1, dc->base.pc_next);
             break;
         default:
-        case DISAS_UPDATE:
+        case DISAS_UPDATE_EXIT:
             gen_a64_set_pc_im(tcg_ctx, dc->base.pc_next);
             /* fall through */
         case DISAS_EXIT:
             tcg_gen_exit_tb(tcg_ctx, NULL, 0);
             break;
+        case DISAS_UPDATE_NOCHAIN:
+            gen_a64_set_pc_im(tcg_ctx, dc->base.pc_next);
+            /* fall through */
         case DISAS_JUMP:
             tcg_gen_lookup_and_goto_ptr(tcg_ctx);
             break;
@@ -14814,8 +15351,7 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
             gen_a64_set_pc_im(tcg_ctx, dc->base.pc_next);
             gen_helper_yield(tcg_ctx, tcg_ctx->cpu_env);
             break;
-        case DISAS_WFI:
-        {
+        case DISAS_WFI: {
             /* This is a special case because we don't want to just halt the CPU
              * if trying to debug across a WFI.
              */
@@ -14842,10 +15378,9 @@ static void aarch64_sync_pc(DisasContextBase *db, CPUState *cpu)
 
 const TranslatorOps aarch64_translator_ops = {
     .init_disas_context = aarch64_tr_init_disas_context,
-    .tb_start           = aarch64_tr_tb_start,
-    .insn_start         = aarch64_tr_insn_start,
-    .breakpoint_check   = aarch64_tr_breakpoint_check,
-    .translate_insn     = aarch64_tr_translate_insn,
-    .tb_stop            = aarch64_tr_tb_stop,
-    .pc_sync            = aarch64_sync_pc
-};
+    .tb_start = aarch64_tr_tb_start,
+    .insn_start = aarch64_tr_insn_start,
+    .breakpoint_check = aarch64_tr_breakpoint_check,
+    .translate_insn = aarch64_tr_translate_insn,
+    .tb_stop = aarch64_tr_tb_stop,
+    .pc_sync = aarch64_sync_pc};
diff --git a/qemu/target/arm/translate-a64.h b/qemu/target/arm/translate-a64.h
index 6092d1b02c..23bb6d490d 100644
--- a/qemu/target/arm/translate-a64.h
+++ b/qemu/target/arm/translate-a64.h
@@ -31,6 +31,7 @@ typedef struct TCGContext TCGContext;
     } while (0)
 
 TCGv_i64 new_tmp_a64(DisasContext *s);
+TCGv_i64 new_tmp_a64_local(DisasContext *s);
 TCGv_i64 new_tmp_a64_zero(DisasContext *s);
 TCGv_i64 cpu_reg(DisasContext *s, int reg);
 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg);
@@ -41,6 +42,11 @@ TCGv_ptr get_fpstatus_ptr(TCGContext *tcg_ctx, bool);
 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
                             unsigned int imms, unsigned int immr);
 bool sve_access_check(DisasContext *s);
+TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
+TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
+                        bool tag_checked, int log2_size);
+TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
+                        bool tag_checked, int count, int log2_esize);
 
 /* We should have at some point before trying to access an FP register
  * done the necessary access check, so assert that
@@ -117,13 +123,7 @@ static inline int vec_full_reg_size(DisasContext *s)
 
 bool disas_sve(DisasContext *, uint32_t);
 
-/* Note that the gvec expanders operate on offsets + sizes.  */
-typedef void GVecGen2Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
-typedef void GVecGen2iFn(TCGContext *, unsigned, uint32_t, uint32_t, int64_t,
-                         uint32_t, uint32_t);
-typedef void GVecGen3Fn(TCGContext *, unsigned, uint32_t, uint32_t,
-                        uint32_t, uint32_t, uint32_t);
-typedef void GVecGen4Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t,
-                        uint32_t, uint32_t, uint32_t);
+void gen_gvec_rax1(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
 
 #endif /* TARGET_ARM_TRANSLATE_A64_H */
diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c
new file mode 100644
index 0000000000..d3e353a2a4
--- /dev/null
+++ b/qemu/target/arm/translate-neon.inc.c
@@ -0,0 +1,4276 @@
+/*
+ *  ARM translation: AArch32 Neon instructions
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *  Copyright (c) 2005-2007 CodeSourcery
+ *  Copyright (c) 2007 OpenedHand, Ltd.
+ *  Copyright (c) 2020 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This file is intended to be included from translate.c; it uses
+ * some macros and definitions provided by that file.
+ * It might be possible to convert it to a standalone .c file eventually.
+ */
+
+static inline int plus1(DisasContext *s, int x)
+{
+    return x + 1;
+}
+
+static inline int rsub_64(DisasContext *s, int x)
+{
+    return 64 - x;
+}
+
+static inline int rsub_32(DisasContext *s, int x)
+{
+    return 32 - x;
+}
+static inline int rsub_16(DisasContext *s, int x)
+{
+    return 16 - x;
+}
+static inline int rsub_8(DisasContext *s, int x)
+{
+    return 8 - x;
+}
+
+/* Include the generated Neon decoder */
+#include "decode-neon-dp.inc.c"
+#include "decode-neon-ls.inc.c"
+#include "decode-neon-shared.inc.c"
+
+/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
+ * where 0 is the least significant end of the register.
+ */
+static inline long
+neon_element_offset(int reg, int element, MemOp size)
+{
+    int element_size = 1 << size;
+    int ofs = element * element_size;
+#ifdef HOST_WORDS_BIGENDIAN
+    /* Calculate the offset assuming fully little-endian,
+     * then XOR to account for the order of the 8-byte units.
+     */
+    if (element_size < 8) {
+        ofs ^= 8 - element_size;
+    }
+#endif
+    return neon_reg_offset(reg, 0) + ofs;
+}
+
+static void neon_load_element(TCGContext *tcg_ctx, TCGv_i32 var, int reg, int ele, MemOp mop)
+{
+    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+    switch (mop) {
+    case MO_UB:
+        tcg_gen_ld8u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    case MO_UW:
+        tcg_gen_ld16u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    case MO_UL:
+        tcg_gen_ld_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_load_element64(TCGContext *tcg_ctx, TCGv_i64 var, int reg, int ele, MemOp mop)
+{
+    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+    switch (mop) {
+    case MO_UB:
+        tcg_gen_ld8u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    case MO_UW:
+        tcg_gen_ld16u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    case MO_UL:
+        tcg_gen_ld32u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    case MO_Q:
+        tcg_gen_ld_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_store_element(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i32 var)
+{
+    long offset = neon_element_offset(reg, ele, size);
+
+    switch (size) {
+    case MO_8:
+        tcg_gen_st8_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    case MO_16:
+        tcg_gen_st16_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    case MO_32:
+        tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_store_element64(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i64 var)
+{
+    long offset = neon_element_offset(reg, ele, size);
+
+    switch (size) {
+    case MO_8:
+        tcg_gen_st8_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    case MO_16:
+        tcg_gen_st16_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    case MO_32:
+        tcg_gen_st32_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    case MO_64:
+        tcg_gen_st_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int opr_sz;
+    TCGv_ptr fpst;
+    gen_helper_gvec_3_ptr *fn_gvec_ptr;
+
+    if (!dc_isar_feature(aa32_vcma, s)
+        || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    opr_sz = (1 + a->q) * 8;
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
+    fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
+    tcg_gen_gvec_3_ptr(tcg_ctx,
+                       vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(1, a->vn),
+                       vfp_reg_offset(1, a->vm),
+                       fpst, opr_sz, opr_sz, a->rot,
+                       fn_gvec_ptr);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+    return true;
+}
+
+static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int opr_sz;
+    TCGv_ptr fpst;
+    gen_helper_gvec_3_ptr *fn_gvec_ptr;
+
+    if (!dc_isar_feature(aa32_vcma, s)
+        || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    opr_sz = (1 + a->q) * 8;
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
+    fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
+    tcg_gen_gvec_3_ptr(tcg_ctx,
+                       vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(1, a->vn),
+                       vfp_reg_offset(1, a->vm),
+                       fpst, opr_sz, opr_sz, a->rot,
+                       fn_gvec_ptr);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+    return true;
+}
+
+static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int opr_sz;
+    gen_helper_gvec_3 *fn_gvec;
+
+    if (!dc_isar_feature(aa32_dp, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    opr_sz = (1 + a->q) * 8;
+    fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
+    tcg_gen_gvec_3_ool(tcg_ctx,
+                       vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(1, a->vn),
+                       vfp_reg_offset(1, a->vm),
+                       opr_sz, opr_sz, 0, fn_gvec);
+    return true;
+}
+
+static bool trans_VFML(DisasContext *s, arg_VFML *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int opr_sz;
+
+    if (!dc_isar_feature(aa32_fhm, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        (a->vd & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    opr_sz = (1 + a->q) * 8;
+    tcg_gen_gvec_3_ptr(tcg_ctx,
+                       vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(a->q, a->vn),
+                       vfp_reg_offset(a->q, a->vm),
+                       tcg_ctx->cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
+                       gen_helper_gvec_fmlal_a32);
+    return true;
+}
+
+static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    gen_helper_gvec_3_ptr *fn_gvec_ptr;
+    int opr_sz;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_vcma, s)) {
+        return false;
+    }
+    if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vn) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
+                   : gen_helper_gvec_fcmlah_idx);
+    opr_sz = (1 + a->q) * 8;
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
+    tcg_gen_gvec_3_ptr(tcg_ctx,
+                       vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(1, a->vn),
+                       vfp_reg_offset(1, a->vm),
+                       fpst, opr_sz, opr_sz,
+                       (a->index << 2) | a->rot, fn_gvec_ptr);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+    return true;
+}
+
+static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    gen_helper_gvec_3 *fn_gvec;
+    int opr_sz;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_dp, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vn) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
+    opr_sz = (1 + a->q) * 8;
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
+    tcg_gen_gvec_3_ool(tcg_ctx,
+                       vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(1, a->vn),
+                       vfp_reg_offset(1, a->rm),
+                       opr_sz, opr_sz, a->index, fn_gvec);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+    return true;
+}
+
+static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int opr_sz;
+
+    if (!dc_isar_feature(aa32_fhm, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
+        return false;
+    }
+
+    if (a->vd & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    opr_sz = (1 + a->q) * 8;
+    tcg_gen_gvec_3_ptr(tcg_ctx,
+                       vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(a->q, a->vn),
+                       vfp_reg_offset(a->q, a->rm),
+                       tcg_ctx->cpu_env, opr_sz, opr_sz,
+                       (a->index << 2) | a->s, /* is_2 == 0 */
+                       gen_helper_gvec_fmlal_idx_a32);
+    return true;
+}
+
+static struct {
+    int nregs;
+    int interleave;
+    int spacing;
+} const neon_ls_element_type[11] = {
+    {1, 4, 1},
+    {1, 4, 2},
+    {4, 1, 1},
+    {2, 2, 2},
+    {1, 3, 1},
+    {1, 3, 2},
+    {3, 1, 1},
+    {1, 1, 1},
+    {1, 2, 1},
+    {1, 2, 2},
+    {2, 1, 1}
+};
+
+static void gen_neon_ldst_base_update(TCGContext *tcg_ctx, DisasContext *s, int rm, int rn,
+                                      int stride)
+{
+    if (rm != 15) {
+        TCGv_i32 base;
+
+        base = load_reg(s, rn);
+        if (rm == 13) {
+            tcg_gen_addi_i32(tcg_ctx, base, base, stride);
+        } else {
+            TCGv_i32 index;
+            index = load_reg(s, rm);
+            tcg_gen_add_i32(tcg_ctx, base, base, index);
+            tcg_temp_free_i32(tcg_ctx, index);
+        }
+        store_reg(s, rn, base);
+    }
+}
+
+static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /* Neon load/store multiple structures */
+    int nregs, interleave, spacing, reg, n;
+    MemOp endian = s->be_data;
+    int mmu_idx = get_mem_index(s);
+    int size = a->size;
+    TCGv_i64 tmp64;
+    TCGv_i32 addr, tmp;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+    if (a->itype > 10) {
+        return false;
+    }
+    /* Catch UNDEF cases for bad values of align field */
+    switch (a->itype & 0xc) {
+    case 4:
+        if (a->align >= 2) {
+            return false;
+        }
+        break;
+    case 8:
+        if (a->align == 3) {
+            return false;
+        }
+        break;
+    default:
+        break;
+    }
+    nregs = neon_ls_element_type[a->itype].nregs;
+    interleave = neon_ls_element_type[a->itype].interleave;
+    spacing = neon_ls_element_type[a->itype].spacing;
+    if (size == 3 && (interleave | spacing) != 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /* For our purposes, bytes are always little-endian.  */
+    if (size == 0) {
+        endian = MO_LE;
+    }
+    /*
+     * Consecutive little-endian elements from a single register
+     * can be promoted to a larger little-endian operation.
+     */
+    if (interleave == 1 && endian == MO_LE) {
+        size = 3;
+    }
+    tmp64 = tcg_temp_new_i64(tcg_ctx);
+    addr = tcg_temp_new_i32(tcg_ctx);
+    tmp = tcg_const_i32(tcg_ctx, 1 << size);
+    load_reg_var(s, addr, a->rn);
+    for (reg = 0; reg < nregs; reg++) {
+        for (n = 0; n < 8 >> size; n++) {
+            int xs;
+            for (xs = 0; xs < interleave; xs++) {
+                int tt = a->vd + reg + spacing * xs;
+
+                if (a->l) {
+                    gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
+                    neon_store_element64(tcg_ctx, tt, n, size, tmp64);
+                } else {
+                    neon_load_element64(tcg_ctx, tmp64, tt, n, size);
+                    gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
+                }
+                tcg_gen_add_i32(tcg_ctx, addr, addr, tmp);
+            }
+        }
+    }
+    tcg_temp_free_i32(tcg_ctx, addr);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    tcg_temp_free_i64(tcg_ctx, tmp64);
+
+    gen_neon_ldst_base_update(
+        tcg_ctx, s, a->rm, a->rn, nregs * interleave * 8);
+    return true;
+}
+
+static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /* Neon load single structure to all lanes */
+    int reg, stride, vec_size;
+    int vd = a->vd;
+    int size = a->size;
+    int nregs = a->n + 1;
+    TCGv_i32 addr, tmp;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    if (size == 3) {
+        if (nregs != 4 || a->a == 0) {
+            return false;
+        }
+        /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
+        size = 2;
+    }
+    if (nregs == 1 && a->a == 1 && size == 0) {
+        return false;
+    }
+    if (nregs == 3 && a->a == 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * VLD1 to all lanes: T bit indicates how many Dregs to write.
+     * VLD2/3/4 to all lanes: T bit indicates register stride.
+     */
+    stride = a->t ? 2 : 1;
+    vec_size = nregs == 1 ? stride * 8 : 8;
+
+    tmp = tcg_temp_new_i32(tcg_ctx);
+    addr = tcg_temp_new_i32(tcg_ctx);
+    load_reg_var(s, addr, a->rn);
+    for (reg = 0; reg < nregs; reg++) {
+        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
+                        s->be_data | size);
+        if ((vd & 1) && vec_size == 16) {
+            /*
+             * We cannot write 16 bytes at once because the
+             * destination is unaligned.
+             */
+            tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(vd, 0),
+                                 8, 8, tmp);
+            tcg_gen_gvec_mov(tcg_ctx, 0, neon_reg_offset(vd + 1, 0),
+                             neon_reg_offset(vd, 0), 8, 8);
+        } else {
+            tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(vd, 0),
+                                 vec_size, vec_size, tmp);
+        }
+        tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size);
+        vd += stride;
+    }
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    tcg_temp_free_i32(tcg_ctx, addr);
+
+    gen_neon_ldst_base_update(tcg_ctx, s, a->rm, a->rn, (1 << size) * nregs);
+
+    return true;
+}
+
+static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /* Neon load/store single structure to one lane */
+    int reg;
+    int nregs = a->n + 1;
+    int vd = a->vd;
+    TCGv_i32 addr, tmp;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    /* Catch the UNDEF cases. This is unavoidably a bit messy. */
+    switch (nregs) {
+    case 1:
+        if (((a->align & (1 << a->size)) != 0) ||
+            (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) {
+            return false;
+        }
+        break;
+    case 3:
+        if ((a->align & 1) != 0) {
+            return false;
+        }
+        /* fall through */
+    case 2:
+        if (a->size == 2 && (a->align & 2) != 0) {
+            return false;
+        }
+        break;
+    case 4:
+        if ((a->size == 2) && ((a->align & 3) == 3)) {
+            return false;
+        }
+        break;
+    default:
+        abort();
+    }
+    if ((vd + a->stride * (nregs - 1)) > 31) {
+        /*
+         * Attempts to write off the end of the register file are
+         * UNPREDICTABLE; we choose to UNDEF because otherwise we would
+         * access off the end of the array that holds the register data.
+         */
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32(tcg_ctx);
+    addr = tcg_temp_new_i32(tcg_ctx);
+    load_reg_var(s, addr, a->rn);
+    /*
+     * TODO: if we implemented alignment exceptions, we should check
+     * addr against the alignment encoded in a->align here.
+     */
+    for (reg = 0; reg < nregs; reg++) {
+        if (a->l) {
+            gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
+                            s->be_data | a->size);
+            neon_store_element(tcg_ctx, vd, a->reg_idx, a->size, tmp);
+        } else { /* Store */
+            neon_load_element(tcg_ctx, tmp, vd, a->reg_idx, a->size);
+            gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
+                            s->be_data | a->size);
+        }
+        vd += a->stride;
+        tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << a->size);
+    }
+    tcg_temp_free_i32(tcg_ctx, addr);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+
+    gen_neon_ldst_base_update(tcg_ctx, s, a->rm, a->rn, (1 << a->size) * nregs);
+
+    return true;
+}
+
+static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int vec_size = a->q ? 16 : 8;
+    int rd_ofs = neon_reg_offset(a->vd, 0);
+    int rn_ofs = neon_reg_offset(a->vn, 0);
+    int rm_ofs = neon_reg_offset(a->vm, 0);
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fn(tcg_ctx, a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
+    return true;
+}
+
+#define DO_3SAME(INSN, FUNC)                                            \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        return do_3same(s, a, FUNC);                                    \
+    }
+
+DO_3SAME(VADD, tcg_gen_gvec_add)
+DO_3SAME(VSUB, tcg_gen_gvec_sub)
+DO_3SAME(VAND, tcg_gen_gvec_and)
+DO_3SAME(VBIC, tcg_gen_gvec_andc)
+DO_3SAME(VORR, tcg_gen_gvec_or)
+DO_3SAME(VORN, tcg_gen_gvec_orc)
+DO_3SAME(VEOR, tcg_gen_gvec_xor)
+DO_3SAME(VSHL_S, gen_gvec_sshl)
+DO_3SAME(VSHL_U, gen_gvec_ushl)
+DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
+DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
+DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
+DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
+
+/* These insns are all gvec_bitsel but with the inputs in various orders. */
+#define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
+    static void gen_##INSN##_3s(TCGContext *tcg_ctx, unsigned vece,     \
+                                uint32_t rd_ofs,                        \
+                                uint32_t rn_ofs,                        \
+                                uint32_t rm_ofs,                        \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        tcg_gen_gvec_bitsel(tcg_ctx, vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
+    }                                                                   \
+    DO_3SAME(INSN, gen_##INSN##_3s)
+
+DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
+DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
+DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
+
+#define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (a->size == 3) {                                             \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, FUNC);                                    \
+    }
+
+DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
+DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
+DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
+DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
+DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
+DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
+DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
+DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
+DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
+DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
+DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
+DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
+
+#define DO_3SAME_CMP(INSN, COND)                                        \
+    static void gen_##INSN##_3s(TCGContext *tcg_ctx,                    \
+                                unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        tcg_gen_gvec_cmp(tcg_ctx, COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
+    }                                                                   \
+    DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
+
+DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
+DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
+DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
+DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
+DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
+
+#define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
+    static void WRAPNAME(TCGContext *tcg_ctx,                              \
+                         unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
+                         uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
+    {                                                                      \
+        tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
+    }
+
+WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
+
+static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_3same(s, a, gen_VMUL_p_3s);
+}
+
+#define DO_VQRDMLAH(INSN, FUNC)                                         \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (!dc_isar_feature(aa32_rdm, s)) {                            \
+            return false;                                               \
+        }                                                               \
+        if (a->size != 1 && a->size != 2) {                             \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, FUNC);                                    \
+    }
+
+DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
+DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
+
+#define DO_SHA1(NAME, FUNC)                                             \
+    WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
+    static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (!dc_isar_feature(aa32_sha1, s)) {                           \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##NAME##_3s);                         \
+    }
+
+DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
+DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
+DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
+DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
+
+#define DO_SHA2(NAME, FUNC)                                             \
+    WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
+    static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (!dc_isar_feature(aa32_sha2, s)) {                           \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##NAME##_3s);                         \
+    }
+
+DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
+DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
+DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
+
+#define DO_3SAME_64(INSN, FUNC)                                         \
+    static void gen_##INSN##_3s(TCGContext *tcg_ctx,                    \
+                                unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        static const GVecGen3 op = { .fni8 = FUNC };                    \
+        tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op);      \
+    }                                                                   \
+    DO_3SAME(INSN, gen_##INSN##_3s)
+
+#define DO_3SAME_64_ENV(INSN, FUNC)                                     \
+    static void gen_##INSN##_elt(TCGContext *tcg_ctx,                   \
+                                 TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)    \
+    {                                                                   \
+        FUNC(tcg_ctx, d, tcg_ctx->cpu_env, n, m);                       \
+    }                                                                   \
+    DO_3SAME_64(INSN, gen_##INSN##_elt)
+
+DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
+DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
+DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
+DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
+DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
+DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
+
+#define DO_3SAME_32(INSN, FUNC)                                         \
+    static void gen_##INSN##_3s(TCGContext *tcg_ctx,                    \
+                                unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        static const GVecGen3 ops[4] = {                                \
+            { .fni4 = gen_helper_neon_##FUNC##8 },                      \
+            { .fni4 = gen_helper_neon_##FUNC##16 },                     \
+            { .fni4 = gen_helper_neon_##FUNC##32 },                     \
+            { 0 },                                                      \
+        };                                                              \
+        tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
+    }                                                                   \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (a->size > 2) {                                              \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##INSN##_3s);                         \
+    }
+
+/*
+ * Some helper functions need to be passed the tcg_ctx->cpu_env. In order
+ * to use those with the gvec APIs like tcg_gen_gvec_3(tcg_ctx, ) we need
+ * to create wrapper functions whose prototype is a NeonGenTwoopfn(tcg_ctx, )
+ * and which call a NeonGenTwoOpEnvFn().
+ */
+#define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
+    static void WRAPNAME(TCGContext *tcg_ctx,                           \
+                         TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
+    {                                                                   \
+        FUNC(tcg_ctx, d, tcg_ctx->cpu_env, n, m);                       \
+    }
+
+#define DO_3SAME_32_ENV(INSN, FUNC)                                     \
+    WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8);        \
+    WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16);      \
+    WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32);      \
+    static void gen_##INSN##_3s(TCGContext *tcg_ctx,                    \
+                                unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        static const GVecGen3 ops[4] = {                                \
+            { .fni4 = gen_##INSN##_tramp8 },                            \
+            { .fni4 = gen_##INSN##_tramp16 },                           \
+            { .fni4 = gen_##INSN##_tramp32 },                           \
+            { 0 },                                                      \
+        };                                                              \
+        tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
+    }                                                                   \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (a->size > 2) {                                              \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##INSN##_3s);                         \
+    }
+
+DO_3SAME_32(VHADD_S, hadd_s)
+DO_3SAME_32(VHADD_U, hadd_u)
+DO_3SAME_32(VHSUB_S, hsub_s)
+DO_3SAME_32(VHSUB_U, hsub_u)
+DO_3SAME_32(VRHADD_S, rhadd_s)
+DO_3SAME_32(VRHADD_U, rhadd_u)
+DO_3SAME_32(VRSHL_S, rshl_s)
+DO_3SAME_32(VRSHL_U, rshl_u)
+
+DO_3SAME_32_ENV(VQSHL_S, qshl_s)
+DO_3SAME_32_ENV(VQSHL_U, qshl_u)
+DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
+DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
+
+static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    /* Operations handled pairwise 32 bits at a time */
+    TCGv_i32 tmp, tmp2, tmp3;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    assert(a->q == 0); /* enforced by decode patterns */
+
+    /*
+     * Note that we have to be careful not to clobber the source operands
+     * in the "vm == vd" case by storing the result of the first pass too
+     * early. Since Q is 0 there are always just two passes, so instead
+     * of a complicated loop over each pass we just unroll.
+     */
+    tmp = neon_load_reg(tcg_ctx, a->vn, 0);
+    tmp2 = neon_load_reg(tcg_ctx, a->vn, 1);
+    fn(tcg_ctx, tmp, tmp, tmp2);
+    tcg_temp_free_i32(tcg_ctx, tmp2);
+
+    tmp3 = neon_load_reg(tcg_ctx, a->vm, 0);
+    tmp2 = neon_load_reg(tcg_ctx, a->vm, 1);
+    fn(tcg_ctx, tmp3, tmp3, tmp2);
+    tcg_temp_free_i32(tcg_ctx, tmp2);
+
+    neon_store_reg(tcg_ctx, a->vd, 0, tmp);
+    neon_store_reg(tcg_ctx, a->vd, 1, tmp3);
+    return true;
+}
+
+#define DO_3SAME_PAIR(INSN, func)                                       \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        static NeonGenTwoOpFn * const fns[] = {                         \
+            gen_helper_neon_##func##8,                                  \
+            gen_helper_neon_##func##16,                                 \
+            gen_helper_neon_##func##32,                                 \
+        };                                                              \
+        if (a->size > 2) {                                              \
+            return false;                                               \
+        }                                                               \
+        return do_3same_pair(s, a, fns[a->size]);                       \
+    }
+
+/* 32-bit pairwise ops end up the same as the elementwise versions.  */
+#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
+#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
+#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
+#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
+#define gen_helper_neon_padd_u32  tcg_gen_add_i32
+
+DO_3SAME_PAIR(VPMAX_S, pmax_s)
+DO_3SAME_PAIR(VPMIN_S, pmin_s)
+DO_3SAME_PAIR(VPMAX_U, pmax_u)
+DO_3SAME_PAIR(VPMIN_U, pmin_u)
+DO_3SAME_PAIR(VPADD, padd_u)
+
+#define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
+    WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16);    \
+    WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32);    \
+    static void gen_##INSN##_3s(TCGContext *tcg_ctx,                    \
+                                unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        static const GVecGen3 ops[2] = {                                \
+            { .fni4 = gen_##INSN##_tramp16 },                           \
+            { .fni4 = gen_##INSN##_tramp32 },                           \
+        };                                                              \
+        tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
+    }                                                                   \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (a->size != 1 && a->size != 2) {                             \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##INSN##_3s);                         \
+    }
+
+DO_3SAME_VQDMULH(VQDMULH, qdmulh)
+DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
+
+static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn,
+                        bool reads_vd)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /*
+     * FP operations handled elementwise 32 bits at a time.
+     * If reads_vd is true then the old value of Vd will be
+     * loaded before calling the callback function. This is
+     * used for multiply-accumulate type operations.
+     */
+    TCGv_i32 tmp, tmp2;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        tmp = neon_load_reg(tcg_ctx, a->vn, pass);
+        tmp2 = neon_load_reg(tcg_ctx, a->vm, pass);
+        if (reads_vd) {
+            TCGv_i32 tmp_rd = neon_load_reg(tcg_ctx, a->vd, pass);
+            fn(tcg_ctx, tmp_rd, tmp, tmp2, fpstatus);
+            neon_store_reg(tcg_ctx, a->vd, pass, tmp_rd);
+            tcg_temp_free_i32(tcg_ctx, tmp);
+        } else {
+            fn(tcg_ctx, tmp, tmp, tmp2, fpstatus);
+            neon_store_reg(tcg_ctx, a->vd, pass, tmp);
+        }
+        tcg_temp_free_i32(tcg_ctx, tmp2);
+    }
+    tcg_temp_free_ptr(tcg_ctx, fpstatus);
+    return true;
+}
+
+/*
+ * For all the functions using this macro, size == 1 means fp16,
+ * which is an architecture extension we don't implement yet.
+ */
+#define DO_3S_FP_GVEC(INSN,FUNC)                                        \
+    static void gen_##INSN##_3s(TCGContext *tcg_ctx,                    \
+                                unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, 1);                            \
+        tcg_gen_gvec_3_ptr(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, fpst,                \
+                           oprsz, maxsz, 0, FUNC);                      \
+        tcg_temp_free_ptr(tcg_ctx, fpst);                                        \
+    }                                                                   \
+    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
+    {                                                                   \
+        if (a->size != 0) {                                             \
+            /* TODO fp16 support */                                     \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##INSN##_3s);                         \
+    }
+
+
+DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
+DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
+DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
+DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s)
+
+/*
+ * For all the functions using this macro, size == 1 means fp16,
+ * which is an architecture extension we don't implement yet.
+ */
+#define DO_3S_FP(INSN,FUNC,READS_VD)                                \
+    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
+    {                                                               \
+        if (a->size != 0) {                                         \
+            /* TODO fp16 support */                                 \
+            return false;                                           \
+        }                                                           \
+        return do_3same_fp(s, a, FUNC, READS_VD);                   \
+    }
+
+DO_3S_FP(VCEQ, gen_helper_neon_ceq_f32, false)
+DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false)
+DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false)
+DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false)
+DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false)
+DO_3S_FP(VMAX, gen_helper_vfp_maxs, false)
+DO_3S_FP(VMIN, gen_helper_vfp_mins, false)
+
+static void gen_VMLA_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn,
+                            TCGv_i32 vm, TCGv_ptr fpstatus)
+{
+    gen_helper_vfp_muls(tcg_ctx, vn, vn, vm, fpstatus);
+    gen_helper_vfp_adds(tcg_ctx, vd, vd, vn, fpstatus);
+}
+
+static void gen_VMLS_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn,
+                           TCGv_i32 vm, TCGv_ptr fpstatus)
+{
+    gen_helper_vfp_muls(tcg_ctx, vn, vn, vm, fpstatus);
+    gen_helper_vfp_subs(tcg_ctx, vd, vd, vn, fpstatus);
+}
+
+DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
+DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)
+
+static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+
+    if (a->size != 0) {
+        /* TODO fp16 support */
+        return false;
+    }
+
+    return do_3same_fp(s, a, gen_helper_vfp_maxnums, false);
+}
+
+static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+
+    if (a->size != 0) {
+        /* TODO fp16 support */
+        return false;
+    }
+
+    return do_3same_fp(s, a, gen_helper_vfp_minnums, false);
+}
+
+WRAP_ENV_FN(gen_VRECPS_tramp, gen_helper_recps_f32)
+
+static void gen_VRECPS_fp_3s(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs,
+                             uint32_t rn_ofs, uint32_t rm_ofs,
+                             uint32_t oprsz, uint32_t maxsz)
+{
+    static const GVecGen3 ops = { .fni4 = gen_VRECPS_tramp };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
+}
+
+static bool trans_VRECPS_fp_3s(DisasContext *s, arg_3same *a)
+{
+    if (a->size != 0) {
+        /* TODO fp16 support */
+        return false;
+    }
+
+    return do_3same(s, a, gen_VRECPS_fp_3s);
+}
+
+WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32)
+
+static void gen_VRSQRTS_fp_3s(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs,
+                              uint32_t rn_ofs, uint32_t rm_ofs,
+                              uint32_t oprsz, uint32_t maxsz)
+{
+    static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
+}
+
+static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
+{
+    if (a->size != 0) {
+        /* TODO fp16 support */
+        return false;
+    }
+
+    return do_3same(s, a, gen_VRSQRTS_fp_3s);
+}
+
+static void gen_VFMA_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
+                            TCGv_ptr fpstatus)
+{
+    gen_helper_vfp_muladds(tcg_ctx, vd, vn, vm, vd, fpstatus);
+}
+
+static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
+{
+    if (!dc_isar_feature(aa32_simdfmac, s)) {
+        return false;
+    }
+
+    if (a->size != 0) {
+        /* TODO fp16 support */
+        return false;
+    }
+
+    return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
+}
+
+static void gen_VFMS_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
+                            TCGv_ptr fpstatus)
+{
+    gen_helper_vfp_negs(tcg_ctx, vn, vn);
+    gen_helper_vfp_muladds(tcg_ctx, vd, vn, vm, vd, fpstatus);
+}
+
+static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
+{
+    if (!dc_isar_feature(aa32_simdfmac, s)) {
+        return false;
+    }
+
+    if (a->size != 0) {
+        /* TODO fp16 support */
+        return false;
+    }
+
+    return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
+}
+
+static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /* FP operations handled pairwise 32 bits at a time */
+    TCGv_i32 tmp, tmp2, tmp3;
+    TCGv_ptr fpstatus;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    assert(a->q == 0); /* enforced by decode patterns */
+
+    /*
+     * Note that we have to be careful not to clobber the source operands
+     * in the "vm == vd" case by storing the result of the first pass too
+     * early. Since Q is 0 there are always just two passes, so instead
+     * of a complicated loop over each pass we just unroll.
+     */
+    fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
+    tmp = neon_load_reg(tcg_ctx, a->vn, 0);
+    tmp2 = neon_load_reg(tcg_ctx, a->vn, 1);
+    fn(tcg_ctx, tmp, tmp, tmp2, fpstatus);
+    tcg_temp_free_i32(tcg_ctx, tmp2);
+
+    tmp3 = neon_load_reg(tcg_ctx, a->vm, 0);
+    tmp2 = neon_load_reg(tcg_ctx, a->vm, 1);
+    fn(tcg_ctx, tmp3, tmp3, tmp2, fpstatus);
+    tcg_temp_free_i32(tcg_ctx, tmp2);
+    tcg_temp_free_ptr(tcg_ctx, fpstatus);
+
+    neon_store_reg(tcg_ctx, a->vd, 0, tmp);
+    neon_store_reg(tcg_ctx, a->vd, 1, tmp3);
+    return true;
+}
+
+/*
+ * For all the functions using this macro, size == 1 means fp16,
+ * which is an architecture extension we don't implement yet.
+ */
+#define DO_3S_FP_PAIR(INSN,FUNC)                                    \
+    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
+    {                                                               \
+        if (a->size != 0) {                                         \
+            /* TODO fp16 support */                                 \
+            return false;                                           \
+        }                                                           \
+        return do_3same_fp_pair(s, a, FUNC);                        \
+    }
+
+DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds)
+DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs)
+DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins)
+
+static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /* Handle a 2-reg-shift insn which can be vectorized. */
+    int vec_size = a->q ? 16 : 8;
+    int rd_ofs = neon_reg_offset(a->vd, 0);
+    int rm_ofs = neon_reg_offset(a->vm, 0);
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fn(tcg_ctx, a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
+    return true;
+}
+
+#define DO_2SH(INSN, FUNC)                                              \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_vector_2sh(s, a, FUNC);                               \
+    }                                                                   \
+
+DO_2SH(VSHL, tcg_gen_gvec_shli)
+DO_2SH(VSLI, gen_gvec_sli)
+DO_2SH(VSRI, gen_gvec_sri)
+DO_2SH(VSRA_S, gen_gvec_ssra)
+DO_2SH(VSRA_U, gen_gvec_usra)
+DO_2SH(VRSHR_S, gen_gvec_srshr)
+DO_2SH(VRSHR_U, gen_gvec_urshr)
+DO_2SH(VRSRA_S, gen_gvec_srsra)
+DO_2SH(VRSRA_U, gen_gvec_ursra)
+
+static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    /* Signed shift out of range results in all-sign-bits */
+    a->shift = MIN(a->shift, (8 << a->size) - 1);
+    return do_vector_2sh(s, a, tcg_gen_gvec_sari);
+}
+
+static void gen_zero_rd_2sh(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+    tcg_gen_gvec_dup_imm(tcg_ctx, vece, rd_ofs, oprsz, maxsz, 0);
+}
+
+static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    /* Shift out of range is architecturally valid and results in zero. */
+    if (a->shift >= (8 << a->size)) {
+        return do_vector_2sh(s, a, gen_zero_rd_2sh);
+    } else {
+        return do_vector_2sh(s, a, tcg_gen_gvec_shri);
+    }
+}
+
+static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
+                             NeonGenTwo64OpEnvFn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /*
+     * 2-reg-and-shift operations, size == 3 case, where the
+     * function needs to be passed tcg_ctx->cpu_env.
+     */
+    TCGv_i64 constimm;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * To avoid excessive duplication of ops we implement shift
+     * by immediate using the variable shift operations.
+     */
+    constimm = tcg_const_i64(tcg_ctx, dup_const(a->size, a->shift));
+
+    for (pass = 0; pass < a->q + 1; pass++) {
+        TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
+
+        neon_load_reg64(tcg_ctx, tmp, a->vm + pass);
+        fn(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, constimm);
+        neon_store_reg64(tcg_ctx, tmp, a->vd + pass);
+        tcg_temp_free_i64(tcg_ctx, tmp);
+    }
+    tcg_temp_free_i64(tcg_ctx, constimm);
+    return true;
+}
+
+static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
+                             NeonGenTwoOpEnvFn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /*
+     * 2-reg-and-shift operations, size < 3 case, where the
+     * helper needs to be passed tcg_ctx->cpu_env.
+     */
+    TCGv_i32 constimm;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * To avoid excessive duplication of ops we implement shift
+     * by immediate using the variable shift operations.
+     */
+    constimm = tcg_const_i32(tcg_ctx, dup_const(a->size, a->shift));
+
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass);
+        fn(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, constimm);
+        neon_store_reg(tcg_ctx, a->vd, pass, tmp);
+    }
+    tcg_temp_free_i32(tcg_ctx, constimm);
+    return true;
+}
+
+#define DO_2SHIFT_ENV(INSN, FUNC)                                       \
+    static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
+    {                                                                   \
+        return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
+    }                                                                   \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        static NeonGenTwoOpEnvFn * const fns[] = {                      \
+            gen_helper_neon_##FUNC##8,                                  \
+            gen_helper_neon_##FUNC##16,                                 \
+            gen_helper_neon_##FUNC##32,                                 \
+        };                                                              \
+        assert(a->size < ARRAY_SIZE(fns));                              \
+        return do_2shift_env_32(s, a, fns[a->size]);                    \
+    }
+
+DO_2SHIFT_ENV(VQSHLU, qshlu_s)
+DO_2SHIFT_ENV(VQSHL_U, qshl_u)
+DO_2SHIFT_ENV(VQSHL_S, qshl_s)
+
+static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
+                                NeonGenTwo64OpFn *shiftfn,
+                                NeonGenNarrowEnvFn *narrowfn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
+    TCGv_i64 constimm, rm1, rm2;
+    TCGv_i32 rd;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is always a right shift, and the shiftfn is always a
+     * left-shift helper, which thus needs the negated shift count.
+     */
+    constimm = tcg_const_i64(tcg_ctx, -a->shift);
+    rm1 = tcg_temp_new_i64(tcg_ctx);
+    rm2 = tcg_temp_new_i64(tcg_ctx);
+
+    /* Load both inputs first to avoid potential overwrite if rm == rd */
+    neon_load_reg64(tcg_ctx, rm1, a->vm);
+    neon_load_reg64(tcg_ctx, rm2, a->vm + 1);
+
+    shiftfn(tcg_ctx, rm1, rm1, constimm);
+    rd = tcg_temp_new_i32(tcg_ctx);
+    narrowfn(tcg_ctx, rd, tcg_ctx->cpu_env, rm1);
+    neon_store_reg(tcg_ctx, a->vd, 0, rd);
+
+    shiftfn(tcg_ctx, rm2, rm2, constimm);
+    rd = tcg_temp_new_i32(tcg_ctx);
+    narrowfn(tcg_ctx, rd, tcg_ctx->cpu_env, rm2);
+    neon_store_reg(tcg_ctx, a->vd, 1, rd);
+
+    tcg_temp_free_i64(tcg_ctx, rm1);
+    tcg_temp_free_i64(tcg_ctx, rm2);
+    tcg_temp_free_i64(tcg_ctx, constimm);
+
+    return true;
+}
+
+static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
+                                NeonGenTwoOpFn *shiftfn,
+                                NeonGenNarrowEnvFn *narrowfn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
+    TCGv_i32 constimm, rm1, rm2, rm3, rm4;
+    TCGv_i64 rtmp;
+    uint32_t imm;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is always a right shift, and the shiftfn is always a
+     * left-shift helper, which thus needs the negated shift count
+     * duplicated into each lane of the immediate value.
+     */
+    if (a->size == 1) {
+        imm = (uint16_t)(-a->shift);
+        imm |= imm << 16;
+    } else {
+        /* size == 2 */
+        imm = -a->shift;
+    }
+    constimm = tcg_const_i32(tcg_ctx, imm);
+
+    /* Load all inputs first to avoid potential overwrite */
+    rm1 = neon_load_reg(tcg_ctx, a->vm, 0);
+    rm2 = neon_load_reg(tcg_ctx, a->vm, 1);
+    rm3 = neon_load_reg(tcg_ctx, a->vm + 1, 0);
+    rm4 = neon_load_reg(tcg_ctx, a->vm + 1, 1);
+    rtmp = tcg_temp_new_i64(tcg_ctx);
+
+    shiftfn(tcg_ctx, rm1, rm1, constimm);
+    shiftfn(tcg_ctx, rm2, rm2, constimm);
+
+    tcg_gen_concat_i32_i64(tcg_ctx, rtmp, rm1, rm2);
+    tcg_temp_free_i32(tcg_ctx, rm2);
+
+    narrowfn(tcg_ctx, rm1, tcg_ctx->cpu_env, rtmp);
+    neon_store_reg(tcg_ctx, a->vd, 0, rm1);
+
+    shiftfn(tcg_ctx, rm3, rm3, constimm);
+    shiftfn(tcg_ctx, rm4, rm4, constimm);
+    tcg_temp_free_i32(tcg_ctx, constimm);
+
+    tcg_gen_concat_i32_i64(tcg_ctx, rtmp, rm3, rm4);
+    tcg_temp_free_i32(tcg_ctx, rm4);
+
+    narrowfn(tcg_ctx, rm3, tcg_ctx->cpu_env, rtmp);
+    tcg_temp_free_i64(tcg_ctx, rtmp);
+    neon_store_reg(tcg_ctx, a->vd, 1, rm3);
+    return true;
+}
+
+#define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
+    }
+#define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
+    }
+
+static void gen_neon_narrow_u32(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    tcg_gen_extrl_i64_i32(tcg_ctx, dest, src);
+}
+
+static void gen_neon_narrow_u16(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    gen_helper_neon_narrow_u16(tcg_ctx, dest, src);
+}
+
+static void gen_neon_narrow_u8(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    gen_helper_neon_narrow_u8(tcg_ctx, dest, src);
+}
+
+DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
+DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
+DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
+
+DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
+DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
+DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
+
+DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
+DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
+DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
+
+DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
+DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
+DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
+DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
+DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
+DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
+
+DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
+DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
+DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
+
+DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
+DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
+DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
+
+DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
+DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
+DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
+
+static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
+                         NeonGenWidenFn *widenfn, bool u)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    TCGv_i64 tmp;
+    TCGv_i32 rm0, rm1;
+    uint64_t widen_mask = 0;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is a widen-and-shift operation. The shift is always less
+     * than the width of the source type, so after widening the input
+     * vector we can simply shift the whole 64-bit widened register,
+     * and then clear the potential overflow bits resulting from left
+     * bits of the narrow input appearing as right bits of the left
+     * neighbour narrow input. Calculate a mask of bits to clear.
+     */
+    if ((a->shift != 0) && (a->size < 2 || u)) {
+        int esize = 8 << a->size;
+        widen_mask = MAKE_64BIT_MASK(0, esize);
+        widen_mask >>= esize - a->shift;
+        widen_mask = dup_const(a->size + 1, widen_mask);
+    }
+
+    rm0 = neon_load_reg(tcg_ctx, a->vm, 0);
+    rm1 = neon_load_reg(tcg_ctx, a->vm, 1);
+    tmp = tcg_temp_new_i64(tcg_ctx);
+
+    widenfn(tcg_ctx, tmp, rm0);
+    tcg_temp_free_i32(tcg_ctx, rm0);
+    if (a->shift != 0) {
+        tcg_gen_shli_i64(tcg_ctx, tmp, tmp, a->shift);
+        tcg_gen_andi_i64(tcg_ctx, tmp, tmp, ~widen_mask);
+    }
+    neon_store_reg64(tcg_ctx, tmp, a->vd);
+
+    widenfn(tcg_ctx, tmp, rm1);
+    tcg_temp_free_i32(tcg_ctx, rm1);
+    if (a->shift != 0) {
+        tcg_gen_shli_i64(tcg_ctx, tmp, tmp, a->shift);
+        tcg_gen_andi_i64(tcg_ctx, tmp, tmp, ~widen_mask);
+    }
+    neon_store_reg64(tcg_ctx, tmp, a->vd + 1);
+    tcg_temp_free_i64(tcg_ctx, tmp);
+    return true;
+}
+
+static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_s8,
+        gen_helper_neon_widen_s16,
+        tcg_gen_ext_i32_i64,
+    };
+    return do_vshll_2sh(s, a, widenfn[a->size], false);
+}
+
+static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+    };
+    return do_vshll_2sh(s, a, widenfn[a->size], true);
+}
+
+static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
+                      NeonGenTwoSingleOpFn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /* FP operations in 2-reg-and-shift group */
+    TCGv_i32 tmp, shiftv;
+    TCGv_ptr fpstatus;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
+    shiftv = tcg_const_i32(tcg_ctx, a->shift);
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        tmp = neon_load_reg(tcg_ctx, a->vm, pass);
+        fn(tcg_ctx, tmp, tmp, shiftv, fpstatus);
+        neon_store_reg(tcg_ctx, a->vd, pass, tmp);
+    }
+    tcg_temp_free_ptr(tcg_ctx, fpstatus);
+    tcg_temp_free_i32(tcg_ctx, shiftv);
+    return true;
+}
+
+#define DO_FP_2SH(INSN, FUNC)                                           \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_fp_2sh(s, a, FUNC);                                   \
+    }
+
+DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos)
+DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos)
+DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero)
+DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero)
+
+static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
+{
+    /*
+     * Expand the encoded constant.
+     * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
+     * We choose to not special-case this and will behave as if a
+     * valid constant encoding of 0 had been given.
+     * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
+     */
+    switch (cmode) {
+    case 0: case 1:
+        /* no-op */
+        break;
+    case 2: case 3:
+        imm <<= 8;
+        break;
+    case 4: case 5:
+        imm <<= 16;
+        break;
+    case 6: case 7:
+        imm <<= 24;
+        break;
+    case 8: case 9:
+        imm |= imm << 16;
+        break;
+    case 10: case 11:
+        imm = (imm << 8) | (imm << 24);
+        break;
+    case 12:
+        imm = (imm << 8) | 0xff;
+        break;
+    case 13:
+        imm = (imm << 16) | 0xffff;
+        break;
+    case 14:
+        if (op) {
+            /*
+             * This is the only case where the top and bottom 32 bits
+             * of the encoded constant differ.
+             */
+            uint64_t imm64 = 0;
+            int n;
+
+            for (n = 0; n < 8; n++) {
+                if (imm & (1 << n)) {
+                    imm64 |= (0xffULL << (n * 8));
+                }
+            }
+            return imm64;
+        }
+        imm |= (imm << 8) | (imm << 16) | (imm << 24);
+        break;
+    case 15:
+        imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
+            | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
+        break;
+    }
+    if (op) {
+        imm = ~imm;
+    }
+    return dup_const(MO_32, imm);
+}
+
+static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
+                        GVecGen2iFn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    uint64_t imm;
+    int reg_ofs, vec_size;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    reg_ofs = neon_reg_offset(a->vd, 0);
+    vec_size = a->q ? 16 : 8;
+    imm = asimd_imm_const(a->imm, a->cmode, a->op);
+
+    fn(tcg_ctx, MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
+    return true;
+}
+
+static void gen_VMOV_1r(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+    tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, c);
+}
+
+static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
+{
+    /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
+    GVecGen2iFn *fn;
+
+    if ((a->cmode & 1) && a->cmode < 12) {
+        /* for op=1, the imm will be inverted, so BIC becomes AND. */
+        fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
+    } else {
+        /* There is one unallocated cmode/op combination in this space */
+        if (a->cmode == 15 && a->op == 1) {
+            return false;
+        }
+        fn = gen_VMOV_1r;
+    }
+    return do_1reg_imm(s, a, fn);
+}
+
+static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
+                           NeonGenWidenFn *widenfn,
+                           NeonGenTwo64OpFn *opfn,
+                           bool src1_wide)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
+    TCGv_i64 rn0_64, rn1_64, rm_64;
+    TCGv_i32 rm;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!widenfn || !opfn) {
+        /* size == 3 case, which is an entirely different insn group */
+        return false;
+    }
+
+    if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rn0_64 = tcg_temp_new_i64(tcg_ctx);
+    rn1_64 = tcg_temp_new_i64(tcg_ctx);
+    rm_64 = tcg_temp_new_i64(tcg_ctx);
+
+    if (src1_wide) {
+        neon_load_reg64(tcg_ctx, rn0_64, a->vn);
+    } else {
+        TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vn, 0);
+        widenfn(tcg_ctx, rn0_64, tmp);
+        tcg_temp_free_i32(tcg_ctx, tmp);
+    }
+    rm = neon_load_reg(tcg_ctx, a->vm, 0);
+
+    widenfn(tcg_ctx, rm_64, rm);
+    tcg_temp_free_i32(tcg_ctx, rm);
+    opfn(tcg_ctx, rn0_64, rn0_64, rm_64);
+
+    /*
+     * Load second pass inputs before storing the first pass result, to
+     * avoid incorrect results if a narrow input overlaps with the result.
+     */
+    if (src1_wide) {
+        neon_load_reg64(tcg_ctx, rn1_64, a->vn + 1);
+    } else {
+        TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vn, 1);
+        widenfn(tcg_ctx, rn1_64, tmp);
+        tcg_temp_free_i32(tcg_ctx, tmp);
+    }
+    rm = neon_load_reg(tcg_ctx, a->vm, 1);
+
+    neon_store_reg64(tcg_ctx, rn0_64, a->vd);
+
+    widenfn(tcg_ctx, rm_64, rm);
+    tcg_temp_free_i32(tcg_ctx, rm);
+    opfn(tcg_ctx, rn1_64, rn1_64, rm_64);
+    neon_store_reg64(tcg_ctx, rn1_64, a->vd + 1);
+
+    tcg_temp_free_i64(tcg_ctx, rn0_64);
+    tcg_temp_free_i64(tcg_ctx, rn1_64);
+    tcg_temp_free_i64(tcg_ctx, rm_64);
+
+    return true;
+}
+
+#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE)                         \
+    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
+    {                                                                   \
+        static NeonGenWidenFn * const widenfn[] = {                     \
+            gen_helper_neon_widen_##S##8,                               \
+            gen_helper_neon_widen_##S##16,                              \
+            tcg_gen_##EXT##_i32_i64,                                    \
+            NULL,                                                       \
+        };                                                              \
+        static NeonGenTwo64OpFn * const addfn[] = {                     \
+            gen_helper_neon_##OP##l_u16,                                \
+            gen_helper_neon_##OP##l_u32,                                \
+            tcg_gen_##OP##_i64,                                         \
+            NULL,                                                       \
+        };                                                              \
+        return do_prewiden_3d(s, a, widenfn[a->size],                   \
+                              addfn[a->size], SRC1WIDE);                \
+    }
+
+DO_PREWIDEN(VADDL_S, s, ext, add, false)
+DO_PREWIDEN(VADDL_U, u, extu, add, false)
+DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
+DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
+DO_PREWIDEN(VADDW_S, s, ext, add, true)
+DO_PREWIDEN(VADDW_U, u, extu, add, true)
+DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
+DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
+
+static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
+                         NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
+    TCGv_i64 rn_64, rm_64;
+    TCGv_i32 rd0, rd1;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn || !narrowfn) {
+        /* size == 3 case, which is an entirely different insn group */
+        return false;
+    }
+
+    if ((a->vn | a->vm) & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rn_64 = tcg_temp_new_i64(tcg_ctx);
+    rm_64 = tcg_temp_new_i64(tcg_ctx);
+    rd0 = tcg_temp_new_i32(tcg_ctx);
+    rd1 = tcg_temp_new_i32(tcg_ctx);
+
+    neon_load_reg64(tcg_ctx, rn_64, a->vn);
+    neon_load_reg64(tcg_ctx, rm_64, a->vm);
+
+    opfn(tcg_ctx, rn_64, rn_64, rm_64);
+
+    narrowfn(tcg_ctx, rd0, rn_64);
+
+    neon_load_reg64(tcg_ctx, rn_64, a->vn + 1);
+    neon_load_reg64(tcg_ctx, rm_64, a->vm + 1);
+
+    opfn(tcg_ctx, rn_64, rn_64, rm_64);
+
+    narrowfn(tcg_ctx, rd1, rn_64);
+
+    neon_store_reg(tcg_ctx, a->vd, 0, rd0);
+    neon_store_reg(tcg_ctx, a->vd, 1, rd1);
+
+    tcg_temp_free_i64(tcg_ctx, rn_64);
+    tcg_temp_free_i64(tcg_ctx, rm_64);
+
+    return true;
+}
+
+#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
+    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
+    {                                                                   \
+        static NeonGenTwo64OpFn * const addfn[] = {                     \
+            gen_helper_neon_##OP##l_u16,                                \
+            gen_helper_neon_##OP##l_u32,                                \
+            tcg_gen_##OP##_i64,                                         \
+            NULL,                                                       \
+        };                                                              \
+        static NeonGenNarrowFn * const narrowfn[] = {                   \
+            gen_helper_neon_##NARROWTYPE##_high_u8,                     \
+            gen_helper_neon_##NARROWTYPE##_high_u16,                    \
+            EXTOP,                                                      \
+            NULL,                                                       \
+        };                                                              \
+        return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
+    }
+
+static void gen_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 rd, TCGv_i64 rn)
+{
+    tcg_gen_addi_i64(tcg_ctx, rn, rn, 1u << 31);
+    tcg_gen_extrh_i64_i32(tcg_ctx, rd, rn);
+}
+
+DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
+DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
+DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
+DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
+
+static bool do_long_3d(DisasContext *s, arg_3diff *a,
+                       NeonGenTwoOpWidenFn *opfn,
+                       NeonGenTwo64OpFn *accfn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /*
+     * 3-regs different lengths, long operations.
+     * These perform an operation on two inputs that returns a double-width
+     * result, and then possibly perform an accumulation operation of
+     * that result into the double-width destination.
+     */
+    TCGv_i64 rd0, rd1, tmp;
+    TCGv_i32 rn, rm;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn) {
+        /* size == 3 case, which is an entirely different insn group */
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rd0 = tcg_temp_new_i64(tcg_ctx);
+    rd1 = tcg_temp_new_i64(tcg_ctx);
+
+    rn = neon_load_reg(tcg_ctx, a->vn, 0);
+    rm = neon_load_reg(tcg_ctx, a->vm, 0);
+    opfn(tcg_ctx, rd0, rn, rm);
+    tcg_temp_free_i32(tcg_ctx, rn);
+    tcg_temp_free_i32(tcg_ctx, rm);
+
+    rn = neon_load_reg(tcg_ctx, a->vn, 1);
+    rm = neon_load_reg(tcg_ctx, a->vm, 1);
+    opfn(tcg_ctx, rd1, rn, rm);
+    tcg_temp_free_i32(tcg_ctx, rn);
+    tcg_temp_free_i32(tcg_ctx, rm);
+
+    /* Don't store results until after all loads: they might overlap */
+    if (accfn) {
+        tmp = tcg_temp_new_i64(tcg_ctx);
+        neon_load_reg64(tcg_ctx, tmp, a->vd);
+        accfn(tcg_ctx, tmp, tmp, rd0);
+        neon_store_reg64(tcg_ctx, tmp, a->vd);
+        neon_load_reg64(tcg_ctx, tmp, a->vd + 1);
+        accfn(tcg_ctx, tmp, tmp, rd1);
+        neon_store_reg64(tcg_ctx, tmp, a->vd + 1);
+        tcg_temp_free_i64(tcg_ctx, tmp);
+    } else {
+        neon_store_reg64(tcg_ctx, rd0, a->vd);
+        neon_store_reg64(tcg_ctx, rd1, a->vd + 1);
+    }
+
+    tcg_temp_free_i64(tcg_ctx, rd0);
+    tcg_temp_free_i64(tcg_ctx, rd1);
+
+    return true;
+}
+
+static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_abdl_s16,
+        gen_helper_neon_abdl_s32,
+        gen_helper_neon_abdl_s64,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_abdl_u16,
+        gen_helper_neon_abdl_u32,
+        gen_helper_neon_abdl_u64,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_abdl_s16,
+        gen_helper_neon_abdl_s32,
+        gen_helper_neon_abdl_s64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const addfn[] = {
+        gen_helper_neon_addl_u16,
+        gen_helper_neon_addl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
+}
+
+static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_abdl_u16,
+        gen_helper_neon_abdl_u32,
+        gen_helper_neon_abdl_u64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const addfn[] = {
+        gen_helper_neon_addl_u16,
+        gen_helper_neon_addl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
+}
+
+static void gen_mull_s32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+    TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx);
+    TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx);
+
+    tcg_gen_muls2_i32(tcg_ctx, lo, hi, rn, rm);
+    tcg_gen_concat_i32_i64(tcg_ctx, rd, lo, hi);
+
+    tcg_temp_free_i32(tcg_ctx, lo);
+    tcg_temp_free_i32(tcg_ctx, hi);
+}
+
+static void gen_mull_u32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+    TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx);
+    TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx);
+
+    tcg_gen_mulu2_i32(tcg_ctx, lo, hi, rn, rm);
+    tcg_gen_concat_i32_i64(tcg_ctx, rd, lo, hi);
+
+    tcg_temp_free_i32(tcg_ctx, lo);
+    tcg_temp_free_i32(tcg_ctx, hi);
+}
+
+static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_mull_s8,
+        gen_helper_neon_mull_s16,
+        gen_mull_s32,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_mull_u8,
+        gen_helper_neon_mull_u16,
+        gen_mull_u32,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+#define DO_VMLAL(INSN,MULL,ACC)                                         \
+    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
+    {                                                                   \
+        static NeonGenTwoOpWidenFn * const opfn[] = {                   \
+            gen_helper_neon_##MULL##8,                                  \
+            gen_helper_neon_##MULL##16,                                 \
+            gen_##MULL##32,                                             \
+            NULL,                                                       \
+        };                                                              \
+        static NeonGenTwo64OpFn * const accfn[] = {                     \
+            gen_helper_neon_##ACC##l_u16,                               \
+            gen_helper_neon_##ACC##l_u32,                               \
+            tcg_gen_##ACC##_i64,                                        \
+            NULL,                                                       \
+        };                                                              \
+        return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
+    }
+
+DO_VMLAL(VMLAL_S,mull_s,add)
+DO_VMLAL(VMLAL_U,mull_u,add)
+DO_VMLAL(VMLSL_S,mull_s,sub)
+DO_VMLAL(VMLSL_U,mull_u,sub)
+
+static void gen_VQDMULL_16(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+    gen_helper_neon_mull_s16(tcg_ctx, rd, rn, rm);
+    gen_helper_neon_addl_saturate_s32(tcg_ctx, rd, tcg_ctx->cpu_env, rd, rd);
+}
+
+static void gen_VQDMULL_32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+    gen_mull_s32(tcg_ctx, rd, rn, rm);
+    gen_helper_neon_addl_saturate_s64(tcg_ctx, rd, tcg_ctx->cpu_env, rd, rd);
+}
+
+static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static void gen_VQDMLAL_acc_16(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    gen_helper_neon_addl_saturate_s32(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm);
+}
+
+static void gen_VQDMLAL_acc_32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    gen_helper_neon_addl_saturate_s64(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm);
+}
+
+static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        NULL,
+        gen_VQDMLAL_acc_16,
+        gen_VQDMLAL_acc_32,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static void gen_VQDMLSL_acc_16(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    gen_helper_neon_negl_u32(tcg_ctx, rm, rm);
+    gen_helper_neon_addl_saturate_s32(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm);
+}
+
+static void gen_VQDMLSL_acc_32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    tcg_gen_neg_i64(tcg_ctx, rm, rm);
+    gen_helper_neon_addl_saturate_s64(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm);
+}
+
+static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        NULL,
+        gen_VQDMLSL_acc_16,
+        gen_VQDMLSL_acc_32,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    gen_helper_gvec_3 *fn_gvec;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    switch (a->size) {
+    case 0:
+        fn_gvec = gen_helper_neon_pmull_h;
+        break;
+    case 2:
+        if (!dc_isar_feature(aa32_pmull, s)) {
+            return false;
+        }
+        fn_gvec = gen_helper_gvec_pmull_q;
+        break;
+    default:
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tcg_gen_gvec_3_ool(tcg_ctx, neon_reg_offset(a->vd, 0),
+                       neon_reg_offset(a->vn, 0),
+                       neon_reg_offset(a->vm, 0),
+                       16, 16, 0, fn_gvec);
+    return true;
+}
+
+static void gen_neon_dup_low16(TCGContext *tcg_ctx, TCGv_i32 var)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_ext16u_i32(tcg_ctx, var, var);
+    tcg_gen_shli_i32(tcg_ctx, tmp, var, 16);
+    tcg_gen_or_i32(tcg_ctx, var, var, tmp);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+}
+
+static void gen_neon_dup_high16(TCGContext *tcg_ctx, TCGv_i32 var)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_andi_i32(tcg_ctx, var, var, 0xffff0000);
+    tcg_gen_shri_i32(tcg_ctx, tmp, var, 16);
+    tcg_gen_or_i32(tcg_ctx, var, var, tmp);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+}
+
+static inline TCGv_i32 neon_get_scalar(TCGContext *tcg_ctx, int size, int reg)
+{
+    TCGv_i32 tmp;
+    if (size == 1) {
+        tmp = neon_load_reg(tcg_ctx, reg & 7, reg >> 4);
+        if (reg & 8) {
+            gen_neon_dup_high16(tcg_ctx, tmp);
+        } else {
+            gen_neon_dup_low16(tcg_ctx, tmp);
+        }
+    } else {
+        tmp = neon_load_reg(tcg_ctx, reg & 15, reg >> 4);
+    }
+    return tmp;
+}
+
+static bool do_2scalar(DisasContext *s, arg_2scalar *a,
+                       NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /*
+     * Two registers and a scalar: perform an operation between
+     * the input elements and the scalar, and then possibly
+     * perform an accumulation operation of that result into the
+     * destination.
+     */
+    TCGv_i32 scalar;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn) {
+        /* Bad size (including size == 3, which is a different insn group) */
+        return false;
+    }
+
+    if (a->q && ((a->vd | a->vn) & 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    scalar = neon_get_scalar(tcg_ctx, a->size, a->vm);
+
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vn, pass);
+        opfn(tcg_ctx, tmp, tmp, scalar);
+        if (accfn) {
+            TCGv_i32 rd = neon_load_reg(tcg_ctx, a->vd, pass);
+            accfn(tcg_ctx, tmp, rd, tmp);
+            tcg_temp_free_i32(tcg_ctx, rd);
+        }
+        neon_store_reg(tcg_ctx, a->vd, pass, tmp);
+    }
+    tcg_temp_free_i32(tcg_ctx, scalar);
+    return true;
+}
+
+static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        gen_helper_neon_mul_u16,
+        tcg_gen_mul_i32,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        gen_helper_neon_mul_u16,
+        tcg_gen_mul_i32,
+        NULL,
+    };
+    static NeonGenTwoOpFn * const accfn[] = {
+        NULL,
+        gen_helper_neon_add_u16,
+        tcg_gen_add_i32,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        gen_helper_neon_mul_u16,
+        tcg_gen_mul_i32,
+        NULL,
+    };
+    static NeonGenTwoOpFn * const accfn[] = {
+        NULL,
+        gen_helper_neon_sub_u16,
+        tcg_gen_sub_i32,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+/*
+ * Rather than have a float-specific version of do_2scalar just for
+ * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
+ * a NeonGenTwoOpFn.
+ */
+#define WRAP_FP_FN(WRAPNAME, FUNC)                              \
+    static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 rd,      \
+                         TCGv_i32 rn, TCGv_i32 rm)              \
+    {                                                           \
+        TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);                \
+        FUNC(tcg_ctx, rd, rn, rm, fpstatus);                             \
+        tcg_temp_free_ptr(tcg_ctx, fpstatus);                            \
+    }
+
+WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
+WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
+WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
+
+static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        NULL, /* TODO: fp16 support */
+        gen_VMUL_F_mul,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        NULL, /* TODO: fp16 support */
+        gen_VMUL_F_mul,
+        NULL,
+    };
+    static NeonGenTwoOpFn * const accfn[] = {
+        NULL,
+        NULL, /* TODO: fp16 support */
+        gen_VMUL_F_add,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        NULL, /* TODO: fp16 support */
+        gen_VMUL_F_mul,
+        NULL,
+    };
+    static NeonGenTwoOpFn * const accfn[] = {
+        NULL,
+        NULL, /* TODO: fp16 support */
+        gen_VMUL_F_sub,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
+WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
+WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
+WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
+
+static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        gen_VQDMULH_16,
+        gen_VQDMULH_32,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        gen_VQRDMULH_16,
+        gen_VQRDMULH_32,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
+                            NeonGenThreeOpEnvFn *opfn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /*
+     * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
+     * performs a kind of fused op-then-accumulate using a helper
+     * function that takes all of rd, rn and the scalar at once.
+     */
+    TCGv_i32 scalar;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_rdm, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn) {
+        /* Bad size (including size == 3, which is a different insn group) */
+        return false;
+    }
+
+    if (a->q && ((a->vd | a->vn) & 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    scalar = neon_get_scalar(tcg_ctx, a->size, a->vm);
+
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 rn = neon_load_reg(tcg_ctx, a->vn, pass);
+        TCGv_i32 rd = neon_load_reg(tcg_ctx, a->vd, pass);
+        opfn(tcg_ctx, rd, tcg_ctx->cpu_env, rn, scalar, rd);
+        tcg_temp_free_i32(tcg_ctx, rn);
+        neon_store_reg(tcg_ctx, a->vd, pass, rd);
+    }
+    tcg_temp_free_i32(tcg_ctx, scalar);
+
+    return true;
+}
+
+static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenThreeOpEnvFn *opfn[] = {
+        NULL,
+        gen_helper_neon_qrdmlah_s16,
+        gen_helper_neon_qrdmlah_s32,
+        NULL,
+    };
+    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
+}
+
+static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenThreeOpEnvFn *opfn[] = {
+        NULL,
+        gen_helper_neon_qrdmlsh_s16,
+        gen_helper_neon_qrdmlsh_s32,
+        NULL,
+    };
+    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
+}
+
+static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
+                            NeonGenTwoOpWidenFn *opfn,
+                            NeonGenTwo64OpFn *accfn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /*
+     * Two registers and a scalar, long operations: perform an
+     * operation on the input elements and the scalar which produces
+     * a double-width result, and then possibly perform an accumulation
+     * operation of that result into the destination.
+     */
+    TCGv_i32 scalar, rn;
+    TCGv_i64 rn0_64, rn1_64;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn) {
+        /* Bad size (including size == 3, which is a different insn group) */
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    scalar = neon_get_scalar(tcg_ctx, a->size, a->vm);
+
+    /* Load all inputs before writing any outputs, in case of overlap */
+    rn = neon_load_reg(tcg_ctx, a->vn, 0);
+    rn0_64 = tcg_temp_new_i64(tcg_ctx);
+    opfn(tcg_ctx, rn0_64, rn, scalar);
+    tcg_temp_free_i32(tcg_ctx, rn);
+
+    rn = neon_load_reg(tcg_ctx, a->vn, 1);
+    rn1_64 = tcg_temp_new_i64(tcg_ctx);
+    opfn(tcg_ctx, rn1_64, rn, scalar);
+    tcg_temp_free_i32(tcg_ctx, rn);
+    tcg_temp_free_i32(tcg_ctx, scalar);
+
+    if (accfn) {
+        TCGv_i64 t64 = tcg_temp_new_i64(tcg_ctx);
+        neon_load_reg64(tcg_ctx, t64, a->vd);
+        accfn(tcg_ctx, t64, t64, rn0_64);
+        neon_store_reg64(tcg_ctx, t64, a->vd);
+        neon_load_reg64(tcg_ctx, t64, a->vd + 1);
+        accfn(tcg_ctx, t64, t64, rn1_64);
+        neon_store_reg64(tcg_ctx, t64, a->vd + 1);
+        tcg_temp_free_i64(tcg_ctx, t64);
+    } else {
+        neon_store_reg64(tcg_ctx, rn0_64, a->vd);
+        neon_store_reg64(tcg_ctx, rn1_64, a->vd + 1);
+    }
+    tcg_temp_free_i64(tcg_ctx, rn0_64);
+    tcg_temp_free_i64(tcg_ctx, rn1_64);
+    return true;
+}
+
+static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_helper_neon_mull_s16,
+        gen_mull_s32,
+        NULL,
+    };
+
+    return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_helper_neon_mull_u16,
+        gen_mull_u32,
+        NULL,
+    };
+
+    return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+#define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
+    static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
+    {                                                                   \
+        static NeonGenTwoOpWidenFn * const opfn[] = {                   \
+            NULL,                                                       \
+            gen_helper_neon_##MULL##16,                                 \
+            gen_##MULL##32,                                             \
+            NULL,                                                       \
+        };                                                              \
+        static NeonGenTwo64OpFn * const accfn[] = {                     \
+            NULL,                                                       \
+            gen_helper_neon_##ACC##l_u32,                               \
+            tcg_gen_##ACC##_i64,                                        \
+            NULL,                                                       \
+        };                                                              \
+        return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
+    }
+
+DO_VMLAL_2SC(VMLAL_S, mull_s, add)
+DO_VMLAL_2SC(VMLAL_U, mull_u, add)
+DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
+DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
+
+static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+
+    return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        NULL,
+        gen_VQDMLAL_acc_16,
+        gen_VQDMLAL_acc_32,
+        NULL,
+    };
+
+    return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        NULL,
+        gen_VQDMLSL_acc_16,
+        gen_VQDMLSL_acc_32,
+        NULL,
+    };
+
+    return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (a->imm > 7 && !a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (!a->q) {
+        /* Extract 64 bits from <Vm:Vn> */
+        TCGv_i64 left, right, dest;
+
+        left = tcg_temp_new_i64(tcg_ctx);
+        right = tcg_temp_new_i64(tcg_ctx);
+        dest = tcg_temp_new_i64(tcg_ctx);
+
+        neon_load_reg64(tcg_ctx, right, a->vn);
+        neon_load_reg64(tcg_ctx, left, a->vm);
+        tcg_gen_extract2_i64(tcg_ctx, dest, right, left, a->imm * 8);
+        neon_store_reg64(tcg_ctx, dest, a->vd);
+
+        tcg_temp_free_i64(tcg_ctx, left);
+        tcg_temp_free_i64(tcg_ctx, right);
+        tcg_temp_free_i64(tcg_ctx, dest);
+    } else {
+        /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
+        TCGv_i64 left, middle, right, destleft, destright;
+
+        left = tcg_temp_new_i64(tcg_ctx);
+        middle = tcg_temp_new_i64(tcg_ctx);
+        right = tcg_temp_new_i64(tcg_ctx);
+        destleft = tcg_temp_new_i64(tcg_ctx);
+        destright = tcg_temp_new_i64(tcg_ctx);
+
+        if (a->imm < 8) {
+            neon_load_reg64(tcg_ctx, right, a->vn);
+            neon_load_reg64(tcg_ctx, middle, a->vn + 1);
+            tcg_gen_extract2_i64(tcg_ctx, destright, right, middle, a->imm * 8);
+            neon_load_reg64(tcg_ctx, left, a->vm);
+            tcg_gen_extract2_i64(tcg_ctx, destleft, middle, left, a->imm * 8);
+        } else {
+            neon_load_reg64(tcg_ctx, right, a->vn + 1);
+            neon_load_reg64(tcg_ctx, middle, a->vm);
+            tcg_gen_extract2_i64(tcg_ctx, destright, right, middle, (a->imm - 8) * 8);
+            neon_load_reg64(tcg_ctx, left, a->vm + 1);
+            tcg_gen_extract2_i64(tcg_ctx, destleft, middle, left, (a->imm - 8) * 8);
+        }
+
+        neon_store_reg64(tcg_ctx, destright, a->vd);
+        neon_store_reg64(tcg_ctx, destleft, a->vd + 1);
+
+        tcg_temp_free_i64(tcg_ctx, destright);
+        tcg_temp_free_i64(tcg_ctx, destleft);
+        tcg_temp_free_i64(tcg_ctx, right);
+        tcg_temp_free_i64(tcg_ctx, middle);
+        tcg_temp_free_i64(tcg_ctx, left);
+    }
+    return true;
+}
+
+static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int n;
+    TCGv_i32 tmp, tmp2, tmp3, tmp4;
+    TCGv_ptr ptr1;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    n = a->len + 1;
+    if ((a->vn + n) > 32) {
+        /*
+         * This is UNPREDICTABLE; we choose to UNDEF to avoid the
+         * helper function running off the end of the register file.
+         */
+        return false;
+    }
+    n <<= 3;
+    if (a->op) {
+        tmp = neon_load_reg(tcg_ctx, a->vd, 0);
+    } else {
+        tmp = tcg_temp_new_i32(tcg_ctx);
+        tcg_gen_movi_i32(tcg_ctx, tmp, 0);
+    }
+    tmp2 = neon_load_reg(tcg_ctx, a->vm, 0);
+    ptr1 = vfp_reg_ptr(tcg_ctx, true, a->vn);
+    tmp4 = tcg_const_i32(tcg_ctx, n);
+    gen_helper_neon_tbl(tcg_ctx, tmp2, tmp2, tmp, ptr1, tmp4);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    if (a->op) {
+        tmp = neon_load_reg(tcg_ctx, a->vd, 1);
+    } else {
+        tmp = tcg_temp_new_i32(tcg_ctx);
+        tcg_gen_movi_i32(tcg_ctx, tmp, 0);
+    }
+    tmp3 = neon_load_reg(tcg_ctx, a->vm, 1);
+    gen_helper_neon_tbl(tcg_ctx, tmp3, tmp3, tmp, ptr1, tmp4);
+    tcg_temp_free_i32(tcg_ctx, tmp4);
+    tcg_temp_free_ptr(tcg_ctx, ptr1);
+    neon_store_reg(tcg_ctx, a->vd, 0, tmp2);
+    neon_store_reg(tcg_ctx, a->vd, 1, tmp3);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    return true;
+}
+
+static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tcg_gen_gvec_dup_mem(tcg_ctx, a->size, neon_reg_offset(a->vd, 0),
+                         neon_element_offset(a->vm, a->index, a->size),
+                         a->q ? 16 : 8, a->q ? 16 : 8);
+    return true;
+}
+
+static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int pass, half;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
+        TCGv_i32 tmp[2];
+
+        for (half = 0; half < 2; half++) {
+            tmp[half] = neon_load_reg(tcg_ctx, a->vm, pass * 2 + half);
+            switch (a->size) {
+            case 0:
+                tcg_gen_bswap32_i32(tcg_ctx, tmp[half], tmp[half]);
+                break;
+            case 1:
+                gen_swap_half(tcg_ctx, tmp[half], tmp[half]);
+                break;
+            case 2:
+                break;
+            default:
+                g_assert_not_reached();
+            }
+        }
+        neon_store_reg(tcg_ctx, a->vd, pass * 2, tmp[1]);
+        neon_store_reg(tcg_ctx, a->vd, pass * 2 + 1, tmp[0]);
+    }
+    return true;
+}
+
+static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
+                              NeonGenWidenFn *widenfn,
+                              NeonGenTwo64OpFn *opfn,
+                              NeonGenTwo64OpFn *accfn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /*
+     * Pairwise long operations: widen both halves of the pair,
+     * combine the pairs with the opfn, and then possibly accumulate
+     * into the destination with the accfn.
+     */
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!widenfn) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    for (pass = 0; pass < a->q + 1; pass++) {
+        TCGv_i32 tmp;
+        TCGv_i64 rm0_64, rm1_64, rd_64;
+
+        rm0_64 = tcg_temp_new_i64(tcg_ctx);
+        rm1_64 = tcg_temp_new_i64(tcg_ctx);
+        rd_64 = tcg_temp_new_i64(tcg_ctx);
+        tmp = neon_load_reg(tcg_ctx, a->vm, pass * 2);
+        widenfn(tcg_ctx, rm0_64, tmp);
+        tcg_temp_free_i32(tcg_ctx, tmp);
+        tmp = neon_load_reg(tcg_ctx, a->vm, pass * 2 + 1);
+        widenfn(tcg_ctx, rm1_64, tmp);
+        tcg_temp_free_i32(tcg_ctx, tmp);
+        opfn(tcg_ctx, rd_64, rm0_64, rm1_64);
+        tcg_temp_free_i64(tcg_ctx, rm0_64);
+        tcg_temp_free_i64(tcg_ctx, rm1_64);
+
+        if (accfn) {
+            TCGv_i64 tmp64 = tcg_temp_new_i64(tcg_ctx);
+            neon_load_reg64(tcg_ctx, tmp64, a->vd + pass);
+            accfn(tcg_ctx, rd_64, tmp64, rd_64);
+            tcg_temp_free_i64(tcg_ctx, tmp64);
+        }
+        neon_store_reg64(tcg_ctx, rd_64, a->vd + pass);
+        tcg_temp_free_i64(tcg_ctx, rd_64);
+    }
+    return true;
+}
+
+static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_s8,
+        gen_helper_neon_widen_s16,
+        tcg_gen_ext_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
+}
+
+static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
+}
+
+static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_s8,
+        gen_helper_neon_widen_s16,
+        tcg_gen_ext_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        gen_helper_neon_addl_u16,
+        gen_helper_neon_addl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
+                             accfn[a->size]);
+}
+
+static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        gen_helper_neon_addl_u16,
+        gen_helper_neon_addl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
+                             accfn[a->size]);
+}
+
+typedef void ZipFn(TCGContext *, TCGv_ptr, TCGv_ptr);
+
+static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
+                       ZipFn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    TCGv_ptr pd, pm;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!fn) {
+        /* Bad size or size/q combination */
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    pd = vfp_reg_ptr(tcg_ctx, true, a->vd);
+    pm = vfp_reg_ptr(tcg_ctx, true, a->vm);
+    fn(tcg_ctx, pd, pm);
+    tcg_temp_free_ptr(tcg_ctx, pd);
+    tcg_temp_free_ptr(tcg_ctx, pm);
+    return true;
+}
+
+static bool trans_VUZP(DisasContext *s, arg_2misc *a)
+{
+    static ZipFn * const fn[2][4] = {
+        {
+            gen_helper_neon_unzip8,
+            gen_helper_neon_unzip16,
+            NULL,
+            NULL,
+        }, {
+            gen_helper_neon_qunzip8,
+            gen_helper_neon_qunzip16,
+            gen_helper_neon_qunzip32,
+            NULL,
+        }
+    };
+    return do_zip_uzp(s, a, fn[a->q][a->size]);
+}
+
+static bool trans_VZIP(DisasContext *s, arg_2misc *a)
+{
+    static ZipFn * const fn[2][4] = {
+        {
+            gen_helper_neon_zip8,
+            gen_helper_neon_zip16,
+            NULL,
+            NULL,
+        }, {
+            gen_helper_neon_qzip8,
+            gen_helper_neon_qzip16,
+            gen_helper_neon_qzip32,
+            NULL,
+        }
+    };
+    return do_zip_uzp(s, a, fn[a->q][a->size]);
+}
+
+static bool do_vmovn(DisasContext *s, arg_2misc *a,
+                     NeonGenNarrowEnvFn *narrowfn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    TCGv_i64 rm;
+    TCGv_i32 rd0, rd1;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!narrowfn) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rm = tcg_temp_new_i64(tcg_ctx);
+    rd0 = tcg_temp_new_i32(tcg_ctx);
+    rd1 = tcg_temp_new_i32(tcg_ctx);
+
+    neon_load_reg64(tcg_ctx, rm, a->vm);
+    narrowfn(tcg_ctx, rd0, tcg_ctx->cpu_env, rm);
+    neon_load_reg64(tcg_ctx, rm, a->vm + 1);
+    narrowfn(tcg_ctx, rd1, tcg_ctx->cpu_env, rm);
+    neon_store_reg(tcg_ctx, a->vd, 0, rd0);
+    neon_store_reg(tcg_ctx, a->vd, 1, rd1);
+    tcg_temp_free_i64(tcg_ctx, rm);
+    return true;
+}
+
+#define DO_VMOVN(INSN, FUNC)                                    \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        static NeonGenNarrowEnvFn * const narrowfn[] = {        \
+            FUNC##8,                                            \
+            FUNC##16,                                           \
+            FUNC##32,                                           \
+            NULL,                                               \
+        };                                                      \
+        return do_vmovn(s, a, narrowfn[a->size]);               \
+    }
+
+DO_VMOVN(VMOVN, gen_neon_narrow_u)
+DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
+DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
+DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
+
+static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    TCGv_i32 rm0, rm1;
+    TCGv_i64 rd;
+    static NeonGenWidenFn * const widenfns[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+        NULL,
+    };
+    NeonGenWidenFn *widenfn = widenfns[a->size];
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!widenfn) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rd = tcg_temp_new_i64(tcg_ctx);
+
+    rm0 = neon_load_reg(tcg_ctx, a->vm, 0);
+    rm1 = neon_load_reg(tcg_ctx, a->vm, 1);
+
+    widenfn(tcg_ctx, rd, rm0);
+    tcg_gen_shli_i64(tcg_ctx, rd, rd, 8 << a->size);
+    neon_store_reg64(tcg_ctx, rd, a->vd);
+    widenfn(tcg_ctx, rd, rm1);
+    tcg_gen_shli_i64(tcg_ctx, rd, rd, 8 << a->size);
+    neon_store_reg64(tcg_ctx, rd, a->vd + 1);
+
+    tcg_temp_free_i64(tcg_ctx, rd);
+    tcg_temp_free_i32(tcg_ctx, rm0);
+    tcg_temp_free_i32(tcg_ctx, rm1);
+    return true;
+}
+
+static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    TCGv_ptr fpst;
+    TCGv_i32 ahp, tmp, tmp2, tmp3;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !dc_isar_feature(aa32_fp16_spconv, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm & 1) || (a->size != 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = get_fpstatus_ptr(tcg_ctx, true);
+    ahp = get_ahp_flag(tcg_ctx);
+    tmp = neon_load_reg(tcg_ctx, a->vm, 0);
+    gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp);
+    tmp2 = neon_load_reg(tcg_ctx, a->vm, 1);
+    gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp);
+    tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16);
+    tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    tmp = neon_load_reg(tcg_ctx, a->vm, 2);
+    gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp);
+    tmp3 = neon_load_reg(tcg_ctx, a->vm, 3);
+    neon_store_reg(tcg_ctx, a->vd, 0, tmp2);
+    gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp);
+    tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16);
+    tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp);
+    neon_store_reg(tcg_ctx, a->vd, 1, tmp3);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    tcg_temp_free_i32(tcg_ctx, ahp);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+
+    return true;
+}
+
+static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    TCGv_ptr fpst;
+    TCGv_i32 ahp, tmp, tmp2, tmp3;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !dc_isar_feature(aa32_fp16_spconv, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd & 1) || (a->size != 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = get_fpstatus_ptr(tcg_ctx, true);
+    ahp = get_ahp_flag(tcg_ctx);
+    tmp3 = tcg_temp_new_i32(tcg_ctx);
+    tmp = neon_load_reg(tcg_ctx, a->vm, 0);
+    tmp2 = neon_load_reg(tcg_ctx, a->vm, 1);
+    tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp);
+    gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp);
+    neon_store_reg(tcg_ctx, a->vd, 0, tmp3);
+    tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16);
+    gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp);
+    neon_store_reg(tcg_ctx, a->vd, 1, tmp);
+    tmp3 = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2);
+    gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp);
+    neon_store_reg(tcg_ctx, a->vd, 2, tmp3);
+    tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16);
+    gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp);
+    neon_store_reg(tcg_ctx, a->vd, 3, tmp2);
+    tcg_temp_free_i32(tcg_ctx, ahp);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+
+    return true;
+}
+
+static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int vec_size = a->q ? 16 : 8;
+    int rd_ofs = neon_reg_offset(a->vd, 0);
+    int rm_ofs = neon_reg_offset(a->vm, 0);
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fn(tcg_ctx, a->size, rd_ofs, rm_ofs, vec_size, vec_size);
+
+    return true;
+}
+
+#define DO_2MISC_VEC(INSN, FN)                                  \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_2misc_vec(s, a, FN);                          \
+    }
+
+DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
+DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
+DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
+DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
+DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
+DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
+DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
+
+static bool trans_VMVN(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_2misc_vec(s, a, tcg_gen_gvec_not);
+}
+
+#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
+    static void WRAPNAME(TCGContext *tcg_ctx,                           \
+                         unsigned vece, uint32_t rd_ofs,                \
+                         uint32_t rm_ofs, uint32_t oprsz,               \
+                         uint32_t maxsz)                                \
+    {                                                                   \
+        tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
+                           DATA, FUNC);                                 \
+    }
+
+#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
+    static void WRAPNAME(TCGContext *tcg_ctx,                           \
+                         unsigned vece, uint32_t rd_ofs,                \
+                         uint32_t rm_ofs, uint32_t oprsz,               \
+                         uint32_t maxsz)                                \
+    {                                                                   \
+        tcg_gen_gvec_2_ool(tcg_ctx, rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
+    }
+
+WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
+WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
+WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
+WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
+WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
+WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
+WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
+
+#define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
+            return false;                                       \
+        }                                                       \
+        return do_2misc_vec(s, a, gen_##INSN);                  \
+    }
+
+DO_2M_CRYPTO(AESE, aa32_aes, 0)
+DO_2M_CRYPTO(AESD, aa32_aes, 0)
+DO_2M_CRYPTO(AESMC, aa32_aes, 0)
+DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
+DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
+DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
+DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
+
+static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int pass;
+
+    /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!fn) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass);
+        fn(tcg_ctx, tmp, tmp);
+        neon_store_reg(tcg_ctx, a->vd, pass, tmp);
+    }
+
+    return true;
+}
+
+static bool trans_VREV32(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        tcg_gen_bswap32_i32,
+        gen_swap_half,
+        NULL,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VREV16(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_2misc(s, a, gen_rev16);
+}
+
+static bool trans_VCLS(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_helper_neon_cls_s8,
+        gen_helper_neon_cls_s16,
+        gen_helper_neon_cls_s32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static void do_VCLZ_32(TCGContext *tcg_ctx, TCGv_i32 rd, TCGv_i32 rm)
+{
+    tcg_gen_clzi_i32(tcg_ctx, rd, rm, 32);
+}
+
+static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_helper_neon_clz_u8,
+        gen_helper_neon_clz_u16,
+        do_VCLZ_32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VCNT(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_2misc(s, a, gen_helper_neon_cnt_u8);
+}
+
+static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 2) {
+        return false;
+    }
+    /* TODO: FP16 : size == 1 */
+    return do_2misc(s, a, gen_helper_vfp_abss);
+}
+
+static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 2) {
+        return false;
+    }
+    /* TODO: FP16 : size == 1 */
+    return do_2misc(s, a, gen_helper_vfp_negs);
+}
+
+static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 2) {
+        return false;
+    }
+    return do_2misc(s, a, gen_helper_recpe_u32);
+}
+
+static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 2) {
+        return false;
+    }
+    return do_2misc(s, a, gen_helper_rsqrte_u32);
+}
+
+#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
+    static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 m)    \
+    {                                                                    \
+        FUNC(tcg_ctx, d, tcg_ctx->cpu_env, m);                           \
+    }
+
+WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
+WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
+WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
+WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
+WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
+WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
+
+static bool trans_VQABS(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_VQABS_s8,
+        gen_VQABS_s16,
+        gen_VQABS_s32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_VQNEG_s8,
+        gen_VQNEG_s16,
+        gen_VQNEG_s32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool do_2misc_fp(DisasContext *s, arg_2misc *a,
+                        NeonGenOneSingleOpFn *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    int pass;
+    TCGv_ptr fpst;
+
+    /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size != 2) {
+        /* TODO: FP16 will be the size == 1 case */
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass);
+        fn(tcg_ctx, tmp, tmp, fpst);
+        neon_store_reg(tcg_ctx, a->vd, pass, tmp);
+    }
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+
+    return true;
+}
+
+#define DO_2MISC_FP(INSN, FUNC)                                 \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_2misc_fp(s, a, FUNC);                         \
+    }
+
+DO_2MISC_FP(VRECPE_F, gen_helper_recpe_f32)
+DO_2MISC_FP(VRSQRTE_F, gen_helper_rsqrte_f32)
+DO_2MISC_FP(VCVT_FS, gen_helper_vfp_sitos)
+DO_2MISC_FP(VCVT_FU, gen_helper_vfp_uitos)
+DO_2MISC_FP(VCVT_SF, gen_helper_vfp_tosizs)
+DO_2MISC_FP(VCVT_UF, gen_helper_vfp_touizs)
+
+static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+    return do_2misc_fp(s, a, gen_helper_rints_exact);
+}
+
+#define WRAP_FP_CMP0_FWD(WRAPNAME, FUNC)                        \
+    static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 d,       \
+                         TCGv_i32 m, TCGv_ptr fpst)             \
+    {                                                           \
+        TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0);              \
+        FUNC(tcg_ctx, d, m, zero, fpst);                        \
+        tcg_temp_free_i32(tcg_ctx, zero);                       \
+    }
+#define WRAP_FP_CMP0_REV(WRAPNAME, FUNC)                        \
+    static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 d,       \
+                         TCGv_i32 m, TCGv_ptr fpst)             \
+    {                                                           \
+        TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0);              \
+        FUNC(tcg_ctx, d, zero, m, fpst);                        \
+        tcg_temp_free_i32(tcg_ctx, zero);                       \
+    }
+
+#define DO_FP_CMP0(INSN, FUNC, REV)                             \
+    WRAP_FP_CMP0_##REV(gen_##INSN, FUNC)                        \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_2misc_fp(s, a, gen_##INSN);                   \
+    }
+
+DO_FP_CMP0(VCGT0_F, gen_helper_neon_cgt_f32, FWD)
+DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD)
+DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD)
+DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV)
+DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV)
+
+static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /*
+     * Handle a VRINT* operation by iterating 32 bits at a time,
+     * with a specified rounding mode in operation.
+     */
+    int pass;
+    TCGv_ptr fpst;
+    TCGv_i32 tcg_rmode;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size != 2) {
+        /* TODO: FP16 will be the size == 1 case */
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
+    tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
+    gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass);
+        gen_helper_rints(tcg_ctx, tmp, tmp, fpst);
+        neon_store_reg(tcg_ctx, a->vd, pass, tmp);
+    }
+    gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
+    tcg_temp_free_i32(tcg_ctx, tcg_rmode);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+
+    return true;
+}
+
+#define DO_VRINT(INSN, RMODE)                                   \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_vrint(s, a, RMODE);                           \
+    }
+
+DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
+DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
+DO_VRINT(VRINTZ, FPROUNDING_ZERO)
+DO_VRINT(VRINTM, FPROUNDING_NEGINF)
+DO_VRINT(VRINTP, FPROUNDING_POSINF)
+
+static bool do_vcvt(DisasContext *s, arg_2misc *a, int rmode, bool is_signed)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    /*
+     * Handle a VCVT* operation by iterating 32 bits at a time,
+     * with a specified rounding mode in operation.
+     */
+    int pass;
+    TCGv_ptr fpst;
+    TCGv_i32 tcg_rmode, tcg_shift;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size != 2) {
+        /* TODO: FP16 will be the size == 1 case */
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
+    tcg_shift = tcg_const_i32(tcg_ctx, 0);
+    tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
+    gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass);
+        if (is_signed) {
+            gen_helper_vfp_tosls(tcg_ctx, tmp, tmp, tcg_shift, fpst);
+        } else {
+            gen_helper_vfp_touls(tcg_ctx, tmp, tmp, tcg_shift, fpst);
+        }
+        neon_store_reg(tcg_ctx, a->vd, pass, tmp);
+    }
+    gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
+    tcg_temp_free_i32(tcg_ctx, tcg_rmode);
+    tcg_temp_free_i32(tcg_ctx, tcg_shift);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+
+    return true;
+}
+
+#define DO_VCVT(INSN, RMODE, SIGNED)                            \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_vcvt(s, a, RMODE, SIGNED);                    \
+    }
+
+DO_VCVT(VCVTAU, FPROUNDING_TIEAWAY, false)
+DO_VCVT(VCVTAS, FPROUNDING_TIEAWAY, true)
+DO_VCVT(VCVTNU, FPROUNDING_TIEEVEN, false)
+DO_VCVT(VCVTNS, FPROUNDING_TIEEVEN, true)
+DO_VCVT(VCVTPU, FPROUNDING_POSINF, false)
+DO_VCVT(VCVTPS, FPROUNDING_POSINF, true)
+DO_VCVT(VCVTMU, FPROUNDING_NEGINF, false)
+DO_VCVT(VCVTMS, FPROUNDING_NEGINF, true)
+
+static bool trans_VSWP(DisasContext *s, arg_2misc *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    TCGv_i64 rm, rd;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size != 0) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rm = tcg_temp_new_i64(tcg_ctx);
+    rd = tcg_temp_new_i64(tcg_ctx);
+    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
+        neon_load_reg64(tcg_ctx, rm, a->vm + pass);
+        neon_load_reg64(tcg_ctx, rd, a->vd + pass);
+        neon_store_reg64(tcg_ctx, rm, a->vd + pass);
+        neon_store_reg64(tcg_ctx, rd, a->vm + pass);
+    }
+    tcg_temp_free_i64(tcg_ctx, rm);
+    tcg_temp_free_i64(tcg_ctx, rd);
+
+    return true;
+}
+static void gen_neon_trn_u8(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1)
+{
+    TCGv_i32 rd, tmp;
+
+    rd = tcg_temp_new_i32(tcg_ctx);
+    tmp = tcg_temp_new_i32(tcg_ctx);
+
+    tcg_gen_shli_i32(tcg_ctx, rd, t0, 8);
+    tcg_gen_andi_i32(tcg_ctx, rd, rd, 0xff00ff00);
+    tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0x00ff00ff);
+    tcg_gen_or_i32(tcg_ctx, rd, rd, tmp);
+
+    tcg_gen_shri_i32(tcg_ctx, t1, t1, 8);
+    tcg_gen_andi_i32(tcg_ctx, t1, t1, 0x00ff00ff);
+    tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xff00ff00);
+    tcg_gen_or_i32(tcg_ctx, t1, t1, tmp);
+    tcg_gen_mov_i32(tcg_ctx, t0, rd);
+
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    tcg_temp_free_i32(tcg_ctx, rd);
+}
+
+static void gen_neon_trn_u16(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1)
+{
+    TCGv_i32 rd, tmp;
+
+    rd = tcg_temp_new_i32(tcg_ctx);
+    tmp = tcg_temp_new_i32(tcg_ctx);
+
+    tcg_gen_shli_i32(tcg_ctx, rd, t0, 16);
+    tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0xffff);
+    tcg_gen_or_i32(tcg_ctx, rd, rd, tmp);
+    tcg_gen_shri_i32(tcg_ctx, t1, t1, 16);
+    tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xffff0000);
+    tcg_gen_or_i32(tcg_ctx, t1, t1, tmp);
+    tcg_gen_mov_i32(tcg_ctx, t0, rd);
+
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    tcg_temp_free_i32(tcg_ctx, rd);
+}
+
+static bool trans_VTRN(DisasContext *s, arg_2misc *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    TCGv_i32 tmp, tmp2;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (a->size == 2) {
+        for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
+            tmp = neon_load_reg(tcg_ctx, a->vm, pass);
+            tmp2 = neon_load_reg(tcg_ctx, a->vd, pass + 1);
+            neon_store_reg(tcg_ctx, a->vm, pass, tmp2);
+            neon_store_reg(tcg_ctx, a->vd, pass + 1, tmp);
+        }
+    } else {
+        for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+            tmp = neon_load_reg(tcg_ctx, a->vm, pass);
+            tmp2 = neon_load_reg(tcg_ctx, a->vd, pass);
+            if (a->size == 0) {
+                gen_neon_trn_u8(tcg_ctx, tmp, tmp2);
+            } else {
+                gen_neon_trn_u16(tcg_ctx, tmp, tmp2);
+            }
+            neon_store_reg(tcg_ctx, a->vm, pass, tmp2);
+            neon_store_reg(tcg_ctx, a->vd, pass, tmp);
+        }
+    }
+    return true;
+}
diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c
index ff2f8ff323..fefbc3a202 100644
--- a/qemu/target/arm/translate-sve.c
+++ b/qemu/target/arm/translate-sve.c
@@ -178,7 +178,7 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     unsigned vsz = vec_full_reg_size(s);
-    tcg_gen_gvec_dup64i(tcg_ctx, vec_full_reg_offset(s, rd), vsz, vsz, word);
+    tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
 }
 
 /* Invoke a vector expander on two Pregs.  */
@@ -1481,7 +1481,7 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
         unsigned oprsz = size_for_gvec(setsz / 8);
 
         if (oprsz * 8 == setsz) {
-            tcg_gen_gvec_dup64i(tcg_ctx, ofs, oprsz, maxsz, word);
+            tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, ofs, oprsz, maxsz, word);
             goto done;
         }
     }
@@ -2088,7 +2088,11 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
             tcg_gen_gvec_dup_mem(tcg_ctx, esz, dofs, nofs, vsz, vsz);
         } else {
-            tcg_gen_gvec_dup64i(tcg_ctx, dofs, vsz, vsz, 0);
+            /*
+             * While dup_mem handles 128-bit elements, dup_imm does not.
+             * Thankfully element size doesn't matter for splatting zero.
+             */
+            tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, vsz, vsz, 0);
         }
     }
     return true;
@@ -3341,7 +3345,7 @@ static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
         imm = vfp_expand_imm(a->esz, a->imm);
         imm = dup_const(a->esz, imm);
 
-        tcg_gen_gvec_dup64i(tcg_ctx, dofs, vsz, vsz, imm);
+        tcg_gen_gvec_dup_imm(tcg_ctx, a->esz, dofs, vsz, vsz, imm);
     }
     return true;
 }
@@ -3356,7 +3360,7 @@ static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
         unsigned vsz = vec_full_reg_size(s);
         int dofs = vec_full_reg_offset(s, a->rd);
 
-        tcg_gen_gvec_dup64i(tcg_ctx, dofs, vsz, vsz, dup_const(a->esz, a->imm));
+        tcg_gen_gvec_dup_imm(tcg_ctx, a->esz, dofs, vsz, vsz, a->imm);
     }
     return true;
 }
@@ -4050,41 +4054,33 @@ static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
 
 typedef void gen_helper_sve_fmla(TCGContext *, TCGv_env, TCGv_ptr, TCGv_i32);
 
-static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
+static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
+                    gen_helper_gvec_5_ptr *fn)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    if (fn == NULL) {
+    if (a->esz == 0) {
         return false;
     }
-    if (!sve_access_check(s)) {
-        return true;
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
+        tcg_gen_gvec_5_ptr(tcg_ctx,
+                           vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vec_full_reg_offset(s, a->ra),
+                           pred_full_reg_offset(s, a->pg),
+                           status, vsz, vsz, 0, fn);
+        tcg_temp_free_ptr(tcg_ctx, status);
     }
 
-    unsigned vsz = vec_full_reg_size(s);
-    unsigned desc;
-    TCGv_i32 t_desc;
-    TCGv_ptr pg = tcg_temp_new_ptr(tcg_ctx);
-
-    /* We would need 7 operands to pass these arguments "properly".
-     * So we encode all the register numbers into the descriptor.
-     */
-    desc = deposit32(a->rd, 5, 5, a->rn);
-    desc = deposit32(desc, 10, 5, a->rm);
-    desc = deposit32(desc, 15, 5, a->ra);
-    desc = simd_desc(vsz, vsz, desc);
-
-    t_desc = tcg_const_i32(tcg_ctx, desc);
-    tcg_gen_addi_ptr(tcg_ctx, pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, a->pg));
-    fn(tcg_ctx, tcg_ctx->cpu_env, pg, t_desc);
-    tcg_temp_free_i32(tcg_ctx, t_desc);
-    tcg_temp_free_ptr(tcg_ctx, pg);
     return true;
 }
 
 #define DO_FMLA(NAME, name) \
 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
 {                                                                    \
-    static gen_helper_sve_fmla * const fns[4] = {                    \
+    static gen_helper_gvec_5_ptr * const fns[4] = {                  \
         NULL, gen_helper_sve_##name##_h,                             \
         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
     };                                                               \
@@ -4101,7 +4097,8 @@ DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    static gen_helper_sve_fmla * const fns[3] = {
+    static gen_helper_gvec_5_ptr * const fns[4] = {
+        NULL,
         gen_helper_sve_fcmla_zpzzz_h,
         gen_helper_sve_fcmla_zpzzz_s,
         gen_helper_sve_fcmla_zpzzz_d,
@@ -4112,25 +4109,15 @@ static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
     }
     if (sve_access_check(s)) {
         unsigned vsz = vec_full_reg_size(s);
-        unsigned desc;
-        TCGv_i32 t_desc;
-        TCGv_ptr pg = tcg_temp_new_ptr(tcg_ctx);
-
-        /* We would need 7 operands to pass these arguments "properly".
-         * So we encode all the register numbers into the descriptor.
-         */
-        desc = deposit32(a->rd, 5, 5, a->rn);
-        desc = deposit32(desc, 10, 5, a->rm);
-        desc = deposit32(desc, 15, 5, a->ra);
-        desc = deposit32(desc, 20, 2, a->rot);
-        desc = sextract32(desc, 0, 22);
-        desc = simd_desc(vsz, vsz, desc);
-
-        t_desc = tcg_const_i32(tcg_ctx, desc);
-        tcg_gen_addi_ptr(tcg_ctx, pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, a->pg));
-        fns[a->esz - 1](tcg_ctx, tcg_ctx->cpu_env, pg, t_desc);
-        tcg_temp_free_i32(tcg_ctx, t_desc);
-        tcg_temp_free_ptr(tcg_ctx, pg);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
+        tcg_gen_gvec_5_ptr(tcg_ctx,
+                           vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vec_full_reg_offset(s, a->ra),
+                           pred_full_reg_offset(s, a->pg),
+                           status, vsz, vsz, a->rot, fns[a->esz]);
+        tcg_temp_free_ptr(tcg_ctx, status);
     }
     return true;
 }
@@ -4468,15 +4455,17 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
     int len_remain = len % 8;
     int nparts = len / 8 + ctpop8(len_remain);
     int midx = get_mem_index(s);
-    TCGv_i64 addr, t0, t1;
+    TCGv_i64 dirty_addr, clean_addr, t0, t1;
 
-    addr = tcg_temp_new_i64(tcg_ctx);
-    t0 = tcg_temp_new_i64(tcg_ctx);
+    dirty_addr = tcg_temp_new_i64(tcg_ctx);
+    tcg_gen_addi_i64(tcg_ctx, dirty_addr, cpu_reg_sp(s, rn), imm);
+    clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
+    tcg_temp_free_i64(tcg_ctx, dirty_addr);
 
-    /* Note that unpredicated load/store of vector/predicate registers
+    /*
+     * Note that unpredicated load/store of vector/predicate registers
      * are defined as a stream of bytes, which equates to little-endian
-     * operations on larger quantities.  There is no nice way to force
-     * a little-endian load for aarch64_be-linux-user out of line.
+     * operations on larger quantities.
      *
      * Attempt to keep code expansion to a minimum by limiting the
      * amount of unrolling done.
@@ -4484,56 +4473,58 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
     if (nparts <= 4) {
         int i;
 
+        t0 = tcg_temp_new_i64(tcg_ctx);
         for (i = 0; i < len_align; i += 8) {
-            tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + i);
-            tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LEQ);
+            tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ);
             tcg_gen_st_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + i);
+            tcg_gen_addi_i64(tcg_ctx, clean_addr, cpu_reg_sp(s, rn), 8);
         }
+        tcg_temp_free_i64(tcg_ctx, t0);
     } else {
         TCGLabel *loop = gen_new_label(tcg_ctx);
         TCGv_ptr tp, i = tcg_const_local_ptr(tcg_ctx, 0);
 
-        gen_set_label(tcg_ctx, loop);
+        /* Copy the clean address into a local temp, live across the loop. */
+        t0 = clean_addr;
+        clean_addr = new_tmp_a64_local(s);
+        tcg_gen_mov_i64(tcg_ctx, clean_addr, t0);
 
-        /* Minimize the number of local temps that must be re-read from
-         * the stack each iteration.  Instead, re-compute values other
-         * than the loop counter.
-         */
-        tp = tcg_temp_new_ptr(tcg_ctx);
-        tcg_gen_addi_ptr(tcg_ctx, tp, i, imm);
-        tcg_gen_extu_ptr_i64(tcg_ctx, addr, tp);
-        tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, rn));
+        gen_set_label(tcg_ctx, loop);
 
-        tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LEQ);
+        t0 = tcg_temp_new_i64(tcg_ctx);
+        tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ);
+        tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8);
 
+        tp = tcg_temp_new_ptr(tcg_ctx);
         tcg_gen_add_ptr(tcg_ctx, tp, tcg_ctx->cpu_env, i);
         tcg_gen_addi_ptr(tcg_ctx, i, i, 8);
         tcg_gen_st_i64(tcg_ctx, t0, tp, vofs);
         tcg_temp_free_ptr(tcg_ctx, tp);
+        tcg_temp_free_i64(tcg_ctx, t0);
 
         tcg_gen_brcondi_ptr(tcg_ctx, TCG_COND_LTU, i, len_align, loop);
         tcg_temp_free_ptr(tcg_ctx, i);
     }
 
-    /* Predicate register loads can be any multiple of 2.
+    /*
+     * Predicate register loads can be any multiple of 2.
      * Note that we still store the entire 64-bit unit into cpu_env.
      */
     if (len_remain) {
-        tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + len_align);
-
+        t0 = tcg_temp_new_i64(tcg_ctx);
         switch (len_remain) {
         case 2:
         case 4:
         case 8:
-            tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LE | ctz32(len_remain));
+            tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx,
+                                MO_LE | ctz32(len_remain));
             break;
 
         case 6:
             t1 = tcg_temp_new_i64(tcg_ctx);
-            tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LEUL);
-            tcg_gen_addi_i64(tcg_ctx, addr, addr, 4);
-            tcg_gen_qemu_ld_i64(tcg_ctx, t1, addr, midx, MO_LEUW);
-            tcg_gen_deposit_i64(tcg_ctx, t0, t0, t1, 32, 32);
+            tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, MO_LEUL);
+            tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 4);
+            tcg_gen_qemu_ld_i64(tcg_ctx, t1, clean_addr, midx, MO_LEUW);            tcg_gen_deposit_i64(tcg_ctx, t0, t0, t1, 32, 32);
             tcg_temp_free_i64(tcg_ctx, t1);
             break;
 
@@ -4541,9 +4532,8 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
             g_assert_not_reached();
         }
         tcg_gen_st_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + len_align);
+        tcg_temp_free_i64(tcg_ctx, t0);
     }
-    tcg_temp_free_i64(tcg_ctx, addr);
-    tcg_temp_free_i64(tcg_ctx, t0);
 }
 
 /* Similarly for stores.  */
@@ -4554,10 +4544,12 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
     int len_remain = len % 8;
     int nparts = len / 8 + ctpop8(len_remain);
     int midx = get_mem_index(s);
-    TCGv_i64 addr, t0;
+    TCGv_i64 dirty_addr, clean_addr, t0;
 
-    addr = tcg_temp_new_i64(tcg_ctx);
-    t0 = tcg_temp_new_i64(tcg_ctx);
+    dirty_addr = tcg_temp_new_i64(tcg_ctx);
+    tcg_gen_addi_i64(tcg_ctx, dirty_addr, cpu_reg_sp(s, rn), imm);
+    clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
+    tcg_temp_free_i64(tcg_ctx, dirty_addr);
 
     /* Note that unpredicated load/store of vector/predicate registers
      * are defined as a stream of bytes, which equates to little-endian
@@ -4570,33 +4562,34 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
     if (nparts <= 4) {
         int i;
 
+        t0 = tcg_temp_new_i64(tcg_ctx);
         for (i = 0; i < len_align; i += 8) {
             tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + i);
-            tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + i);
-            tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEQ);
+            tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ);
+            tcg_gen_addi_i64(tcg_ctx, clean_addr, cpu_reg_sp(s, rn), 8);
         }
     } else {
         TCGLabel *loop = gen_new_label(tcg_ctx);
-        TCGv_ptr t2, i = tcg_const_local_ptr(tcg_ctx, 0);
-
-        gen_set_label(tcg_ctx, loop);
+        TCGv_ptr tp, i = tcg_const_local_ptr(tcg_ctx, 0);
 
-        t2 = tcg_temp_new_ptr(tcg_ctx);
-        tcg_gen_add_ptr(tcg_ctx, t2, tcg_ctx->cpu_env, i);
-        tcg_gen_ld_i64(tcg_ctx, t0, t2, vofs);
 
-        /* Minimize the number of local temps that must be re-read from
-         * the stack each iteration.  Instead, re-compute values other
-         * than the loop counter.
-         */
-        tcg_gen_addi_ptr(tcg_ctx, t2, i, imm);
-        tcg_gen_extu_ptr_i64(tcg_ctx, addr, t2);
-        tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, rn));
-        tcg_temp_free_ptr(tcg_ctx, t2);
+        /* Copy the clean address into a local temp, live across the loop. */
+        t0 = clean_addr;
+        clean_addr = new_tmp_a64_local(s);
+        tcg_gen_mov_i64(tcg_ctx, clean_addr, t0);
 
-        tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEQ);
+        gen_set_label(tcg_ctx, loop);
 
+        t0 = tcg_temp_new_i64(tcg_ctx);
+        tp = tcg_temp_new_ptr(tcg_ctx);
+        tcg_gen_add_ptr(tcg_ctx, tp, tcg_ctx->cpu_env, i);
+        tcg_gen_ld_i64(tcg_ctx, t0, tp, vofs);
         tcg_gen_addi_ptr(tcg_ctx, i, i, 8);
+        tcg_temp_free_ptr(tcg_ctx, tp);
+
+        tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ);
+        tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8);
+        tcg_temp_free_i64(tcg_ctx, t0);
 
         tcg_gen_brcondi_ptr(tcg_ctx, TCG_COND_LTU, i, len_align, loop);
         tcg_temp_free_ptr(tcg_ctx, i);
@@ -4604,29 +4597,29 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
 
     /* Predicate register stores can be any multiple of 2.  */
     if (len_remain) {
+        t0 = tcg_temp_new_i64(tcg_ctx);
         tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + len_align);
-        tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + len_align);
 
         switch (len_remain) {
         case 2:
         case 4:
         case 8:
-            tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LE | ctz32(len_remain));
+            tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx,
+                                MO_LE | ctz32(len_remain));
             break;
 
         case 6:
-            tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEUL);
-            tcg_gen_addi_i64(tcg_ctx, addr, addr, 4);
+            tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEUL);
+            tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 4);
             tcg_gen_shri_i64(tcg_ctx, t0, t0, 32);
-            tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEUW);
+            tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEUW);
             break;
 
         default:
             g_assert_not_reached();
         }
+        tcg_temp_free_i64(tcg_ctx, t0);
     }
-    tcg_temp_free_i64(tcg_ctx, addr);
-    tcg_temp_free_i64(tcg_ctx, t0);
 }
 
 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
@@ -4691,27 +4684,36 @@ static const uint8_t dtype_esz[16] = {
     3, 2, 1, 3
 };
 
-static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
-{
-    return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
-}
-
 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
-                       int dtype, gen_helper_gvec_mem *fn)
+                       int dtype, uint32_t mte_n, bool is_write,
+                       gen_helper_gvec_mem *fn)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     unsigned vsz = vec_full_reg_size(s);
     TCGv_ptr t_pg;
     TCGv_i32 t_desc;
-    int desc;
+    int desc = 0;
 
-    /* For e.g. LD4, there are not enough arguments to pass all 4
+    /*
+     * For e.g. LD4, there are not enough arguments to pass all 4
      * registers as pointers, so encode the regno into the data field.
      * For consistency, do this even for LD1.
      */
-    desc = sve_memopidx(s, dtype);
-    desc |= zt << MEMOPIDX_SHIFT;
-    desc = simd_desc(vsz, vsz, desc);
+    if (s->mte_active[0]) {
+        int msz = dtype_msz(dtype);
+
+        FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc);
+        FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc);
+        FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc);
+        FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc);
+        FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz, desc);
+        FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz, desc);
+        desc <<= SVE_MTEDESC_SHIFT;
+    } else {
+        addr = clean_data_tbi(s, addr);
+    }
+
+    desc = simd_desc(vsz, vsz, zt | desc);
     t_desc = tcg_const_i32(tcg_ctx, desc);
     t_pg = tcg_temp_new_ptr(tcg_ctx);
 
@@ -4725,64 +4727,132 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
 static void do_ld_zpa(DisasContext *s, int zt, int pg,
                       TCGv_i64 addr, int dtype, int nreg)
 {
-    static gen_helper_gvec_mem * const fns[2][16][4] = {
-        /* Little-endian */
-        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
-            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
-          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
-
-          { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
-            gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
-          { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
-
-          { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
-            gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
-          { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
-
-          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
-            gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
-
-        /* Big-endian */
-        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
-            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
-          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
-
-          { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
-            gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
-          { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
-
-          { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
-            gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
-          { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
-
-          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
-          { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
-            gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
+    static gen_helper_gvec_mem * const fns[2][2][16][4] = {
+        { /* mte inactive, little-endian */
+          { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+              gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+            { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
+              gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
+            { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
+              gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
+            { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
+              gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
+
+          /* mte inactive, big-endian */
+          { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+              gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+            { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
+              gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
+            { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
+              gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
+            { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+            { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
+              gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
+
+        { /* mte active, little-endian */
+          { { gen_helper_sve_ld1bb_r_mte,
+              gen_helper_sve_ld2bb_r_mte,
+              gen_helper_sve_ld3bb_r_mte,
+              gen_helper_sve_ld4bb_r_mte },
+            { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hh_le_r_mte,
+              gen_helper_sve_ld2hh_le_r_mte,
+              gen_helper_sve_ld3hh_le_r_mte,
+              gen_helper_sve_ld4hh_le_r_mte },
+            { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1ss_le_r_mte,
+              gen_helper_sve_ld2ss_le_r_mte,
+              gen_helper_sve_ld3ss_le_r_mte,
+              gen_helper_sve_ld4ss_le_r_mte },
+            { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1dd_le_r_mte,
+              gen_helper_sve_ld2dd_le_r_mte,
+              gen_helper_sve_ld3dd_le_r_mte,
+              gen_helper_sve_ld4dd_le_r_mte } },
+
+          /* mte active, big-endian */
+          { { gen_helper_sve_ld1bb_r_mte,
+              gen_helper_sve_ld2bb_r_mte,
+              gen_helper_sve_ld3bb_r_mte,
+              gen_helper_sve_ld4bb_r_mte },
+            { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hh_be_r_mte,
+              gen_helper_sve_ld2hh_be_r_mte,
+              gen_helper_sve_ld3hh_be_r_mte,
+              gen_helper_sve_ld4hh_be_r_mte },
+            { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1ss_be_r_mte,
+              gen_helper_sve_ld2ss_be_r_mte,
+              gen_helper_sve_ld3ss_be_r_mte,
+              gen_helper_sve_ld4ss_be_r_mte },
+            { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
+
+            { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
+            { gen_helper_sve_ld1dd_be_r_mte,
+              gen_helper_sve_ld2dd_be_r_mte,
+              gen_helper_sve_ld3dd_be_r_mte,
+              gen_helper_sve_ld4dd_be_r_mte } } },
     };
-    gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
+    gen_helper_gvec_mem *fn
+        = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
 
-    /* While there are holes in the table, they are not
+    /*
+     * While there are holes in the table, they are not
      * accessible via the instruction encoding.
      */
     assert(fn != NULL);
-    do_mem_zpa(s, zt, pg, addr, dtype, fn);
+    do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
 }
 
 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
@@ -4819,56 +4889,98 @@ static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    static gen_helper_gvec_mem * const fns[2][16] = {
-        /* Little-endian */
-        { gen_helper_sve_ldff1bb_r,
-          gen_helper_sve_ldff1bhu_r,
-          gen_helper_sve_ldff1bsu_r,
-          gen_helper_sve_ldff1bdu_r,
-
-          gen_helper_sve_ldff1sds_le_r,
-          gen_helper_sve_ldff1hh_le_r,
-          gen_helper_sve_ldff1hsu_le_r,
-          gen_helper_sve_ldff1hdu_le_r,
-
-          gen_helper_sve_ldff1hds_le_r,
-          gen_helper_sve_ldff1hss_le_r,
-          gen_helper_sve_ldff1ss_le_r,
-          gen_helper_sve_ldff1sdu_le_r,
-
-          gen_helper_sve_ldff1bds_r,
-          gen_helper_sve_ldff1bss_r,
-          gen_helper_sve_ldff1bhs_r,
-          gen_helper_sve_ldff1dd_le_r },
-
-        /* Big-endian */
-        { gen_helper_sve_ldff1bb_r,
-          gen_helper_sve_ldff1bhu_r,
-          gen_helper_sve_ldff1bsu_r,
-          gen_helper_sve_ldff1bdu_r,
-
-          gen_helper_sve_ldff1sds_be_r,
-          gen_helper_sve_ldff1hh_be_r,
-          gen_helper_sve_ldff1hsu_be_r,
-          gen_helper_sve_ldff1hdu_be_r,
-
-          gen_helper_sve_ldff1hds_be_r,
-          gen_helper_sve_ldff1hss_be_r,
-          gen_helper_sve_ldff1ss_be_r,
-          gen_helper_sve_ldff1sdu_be_r,
-
-          gen_helper_sve_ldff1bds_r,
-          gen_helper_sve_ldff1bss_r,
-          gen_helper_sve_ldff1bhs_r,
-          gen_helper_sve_ldff1dd_be_r },
+    static gen_helper_gvec_mem * const fns[2][2][16] = {
+        { /* mte inactive, little-endian */
+          { gen_helper_sve_ldff1bb_r,
+            gen_helper_sve_ldff1bhu_r,
+            gen_helper_sve_ldff1bsu_r,
+            gen_helper_sve_ldff1bdu_r,
+
+            gen_helper_sve_ldff1sds_le_r,
+            gen_helper_sve_ldff1hh_le_r,
+            gen_helper_sve_ldff1hsu_le_r,
+            gen_helper_sve_ldff1hdu_le_r,
+
+            gen_helper_sve_ldff1hds_le_r,
+            gen_helper_sve_ldff1hss_le_r,
+            gen_helper_sve_ldff1ss_le_r,
+            gen_helper_sve_ldff1sdu_le_r,
+
+            gen_helper_sve_ldff1bds_r,
+            gen_helper_sve_ldff1bss_r,
+            gen_helper_sve_ldff1bhs_r,
+            gen_helper_sve_ldff1dd_le_r },
+
+          /* mte inactive, big-endian */
+          { gen_helper_sve_ldff1bb_r,
+            gen_helper_sve_ldff1bhu_r,
+            gen_helper_sve_ldff1bsu_r,
+            gen_helper_sve_ldff1bdu_r,
+
+            gen_helper_sve_ldff1sds_be_r,
+            gen_helper_sve_ldff1hh_be_r,
+            gen_helper_sve_ldff1hsu_be_r,
+            gen_helper_sve_ldff1hdu_be_r,
+
+            gen_helper_sve_ldff1hds_be_r,
+            gen_helper_sve_ldff1hss_be_r,
+            gen_helper_sve_ldff1ss_be_r,
+            gen_helper_sve_ldff1sdu_be_r,
+
+            gen_helper_sve_ldff1bds_r,
+            gen_helper_sve_ldff1bss_r,
+            gen_helper_sve_ldff1bhs_r,
+            gen_helper_sve_ldff1dd_be_r } },
+
+        { /* mte active, little-endian */
+          { gen_helper_sve_ldff1bb_r_mte,
+            gen_helper_sve_ldff1bhu_r_mte,
+            gen_helper_sve_ldff1bsu_r_mte,
+            gen_helper_sve_ldff1bdu_r_mte,
+
+            gen_helper_sve_ldff1sds_le_r_mte,
+            gen_helper_sve_ldff1hh_le_r_mte,
+            gen_helper_sve_ldff1hsu_le_r_mte,
+            gen_helper_sve_ldff1hdu_le_r_mte,
+
+            gen_helper_sve_ldff1hds_le_r_mte,
+            gen_helper_sve_ldff1hss_le_r_mte,
+            gen_helper_sve_ldff1ss_le_r_mte,
+            gen_helper_sve_ldff1sdu_le_r_mte,
+
+            gen_helper_sve_ldff1bds_r_mte,
+            gen_helper_sve_ldff1bss_r_mte,
+            gen_helper_sve_ldff1bhs_r_mte,
+            gen_helper_sve_ldff1dd_le_r_mte },
+
+          /* mte active, big-endian */
+          { gen_helper_sve_ldff1bb_r_mte,
+            gen_helper_sve_ldff1bhu_r_mte,
+            gen_helper_sve_ldff1bsu_r_mte,
+            gen_helper_sve_ldff1bdu_r_mte,
+
+            gen_helper_sve_ldff1sds_be_r_mte,
+            gen_helper_sve_ldff1hh_be_r_mte,
+            gen_helper_sve_ldff1hsu_be_r_mte,
+            gen_helper_sve_ldff1hdu_be_r_mte,
+
+            gen_helper_sve_ldff1hds_be_r_mte,
+            gen_helper_sve_ldff1hss_be_r_mte,
+            gen_helper_sve_ldff1ss_be_r_mte,
+            gen_helper_sve_ldff1sdu_be_r_mte,
+
+            gen_helper_sve_ldff1bds_r_mte,
+            gen_helper_sve_ldff1bss_r_mte,
+            gen_helper_sve_ldff1bhs_r_mte,
+            gen_helper_sve_ldff1dd_be_r_mte } },
     };
 
     if (sve_access_check(s)) {
         TCGv_i64 addr = new_tmp_a64(s);
         tcg_gen_shli_i64(tcg_ctx, addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
         tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, a->rn));
-        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
-                   fns[s->be_data == MO_BE][a->dtype]);
+        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
+                   fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
     }
     return true;
 }
@@ -4876,48 +4988,90 @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    static gen_helper_gvec_mem * const fns[2][16] = {
-        /* Little-endian */
-        { gen_helper_sve_ldnf1bb_r,
-          gen_helper_sve_ldnf1bhu_r,
-          gen_helper_sve_ldnf1bsu_r,
-          gen_helper_sve_ldnf1bdu_r,
-
-          gen_helper_sve_ldnf1sds_le_r,
-          gen_helper_sve_ldnf1hh_le_r,
-          gen_helper_sve_ldnf1hsu_le_r,
-          gen_helper_sve_ldnf1hdu_le_r,
-
-          gen_helper_sve_ldnf1hds_le_r,
-          gen_helper_sve_ldnf1hss_le_r,
-          gen_helper_sve_ldnf1ss_le_r,
-          gen_helper_sve_ldnf1sdu_le_r,
-
-          gen_helper_sve_ldnf1bds_r,
-          gen_helper_sve_ldnf1bss_r,
-          gen_helper_sve_ldnf1bhs_r,
-          gen_helper_sve_ldnf1dd_le_r },
-
-        /* Big-endian */
-        { gen_helper_sve_ldnf1bb_r,
-          gen_helper_sve_ldnf1bhu_r,
-          gen_helper_sve_ldnf1bsu_r,
-          gen_helper_sve_ldnf1bdu_r,
-
-          gen_helper_sve_ldnf1sds_be_r,
-          gen_helper_sve_ldnf1hh_be_r,
-          gen_helper_sve_ldnf1hsu_be_r,
-          gen_helper_sve_ldnf1hdu_be_r,
-
-          gen_helper_sve_ldnf1hds_be_r,
-          gen_helper_sve_ldnf1hss_be_r,
-          gen_helper_sve_ldnf1ss_be_r,
-          gen_helper_sve_ldnf1sdu_be_r,
-
-          gen_helper_sve_ldnf1bds_r,
-          gen_helper_sve_ldnf1bss_r,
-          gen_helper_sve_ldnf1bhs_r,
-          gen_helper_sve_ldnf1dd_be_r },
+    static gen_helper_gvec_mem * const fns[2][2][16] = {
+        { /* mte inactive, little-endian */
+          { gen_helper_sve_ldnf1bb_r,
+            gen_helper_sve_ldnf1bhu_r,
+            gen_helper_sve_ldnf1bsu_r,
+            gen_helper_sve_ldnf1bdu_r,
+
+            gen_helper_sve_ldnf1sds_le_r,
+            gen_helper_sve_ldnf1hh_le_r,
+            gen_helper_sve_ldnf1hsu_le_r,
+            gen_helper_sve_ldnf1hdu_le_r,
+
+            gen_helper_sve_ldnf1hds_le_r,
+            gen_helper_sve_ldnf1hss_le_r,
+            gen_helper_sve_ldnf1ss_le_r,
+            gen_helper_sve_ldnf1sdu_le_r,
+
+            gen_helper_sve_ldnf1bds_r,
+            gen_helper_sve_ldnf1bss_r,
+            gen_helper_sve_ldnf1bhs_r,
+            gen_helper_sve_ldnf1dd_le_r },
+
+          /* mte inactive, big-endian */
+          { gen_helper_sve_ldnf1bb_r,
+            gen_helper_sve_ldnf1bhu_r,
+            gen_helper_sve_ldnf1bsu_r,
+            gen_helper_sve_ldnf1bdu_r,
+
+            gen_helper_sve_ldnf1sds_be_r,
+            gen_helper_sve_ldnf1hh_be_r,
+            gen_helper_sve_ldnf1hsu_be_r,
+            gen_helper_sve_ldnf1hdu_be_r,
+
+            gen_helper_sve_ldnf1hds_be_r,
+            gen_helper_sve_ldnf1hss_be_r,
+            gen_helper_sve_ldnf1ss_be_r,
+            gen_helper_sve_ldnf1sdu_be_r,
+
+            gen_helper_sve_ldnf1bds_r,
+            gen_helper_sve_ldnf1bss_r,
+            gen_helper_sve_ldnf1bhs_r,
+            gen_helper_sve_ldnf1dd_be_r } },
+
+        { /* mte inactive, little-endian */
+          { gen_helper_sve_ldnf1bb_r_mte,
+            gen_helper_sve_ldnf1bhu_r_mte,
+            gen_helper_sve_ldnf1bsu_r_mte,
+            gen_helper_sve_ldnf1bdu_r_mte,
+
+            gen_helper_sve_ldnf1sds_le_r_mte,
+            gen_helper_sve_ldnf1hh_le_r_mte,
+            gen_helper_sve_ldnf1hsu_le_r_mte,
+            gen_helper_sve_ldnf1hdu_le_r_mte,
+
+            gen_helper_sve_ldnf1hds_le_r_mte,
+            gen_helper_sve_ldnf1hss_le_r_mte,
+            gen_helper_sve_ldnf1ss_le_r_mte,
+            gen_helper_sve_ldnf1sdu_le_r_mte,
+
+            gen_helper_sve_ldnf1bds_r_mte,
+            gen_helper_sve_ldnf1bss_r_mte,
+            gen_helper_sve_ldnf1bhs_r_mte,
+            gen_helper_sve_ldnf1dd_le_r_mte },
+
+          /* mte inactive, big-endian */
+          { gen_helper_sve_ldnf1bb_r_mte,
+            gen_helper_sve_ldnf1bhu_r_mte,
+            gen_helper_sve_ldnf1bsu_r_mte,
+            gen_helper_sve_ldnf1bdu_r_mte,
+
+            gen_helper_sve_ldnf1sds_be_r_mte,
+            gen_helper_sve_ldnf1hh_be_r_mte,
+            gen_helper_sve_ldnf1hsu_be_r_mte,
+            gen_helper_sve_ldnf1hdu_be_r_mte,
+
+            gen_helper_sve_ldnf1hds_be_r_mte,
+            gen_helper_sve_ldnf1hss_be_r_mte,
+            gen_helper_sve_ldnf1ss_be_r_mte,
+            gen_helper_sve_ldnf1sdu_be_r_mte,
+
+            gen_helper_sve_ldnf1bds_r_mte,
+            gen_helper_sve_ldnf1bss_r_mte,
+            gen_helper_sve_ldnf1bhs_r_mte,
+            gen_helper_sve_ldnf1dd_be_r_mte } },
     };
 
     if (sve_access_check(s)) {
@@ -4927,8 +5081,8 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
         TCGv_i64 addr = new_tmp_a64(s);
 
         tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, a->rn), off);
-        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
-                   fns[s->be_data == MO_BE][a->dtype]);
+        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
+                   fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
     }
     return true;
 }
@@ -4948,9 +5102,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
     int desc, poff;
 
     /* Load the first quadword using the normal predicated load helpers.  */
-    desc = sve_memopidx(s, msz_dtype(s, msz));
-    desc |= zt << MEMOPIDX_SHIFT;
-    desc = simd_desc(16, 16, desc);
+    desc = simd_desc(16, 16, zt);
     t_desc = tcg_const_i32(tcg_ctx, desc);
 
     poff = pred_full_reg_offset(s, pg);
@@ -5026,8 +5178,14 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
     unsigned psz = pred_full_reg_size(s);
     unsigned esz = dtype_esz[a->dtype];
     unsigned msz = dtype_msz(a->dtype);
-    TCGLabel *over = gen_new_label(tcg_ctx);
-    TCGv_i64 temp;
+    TCGLabel *over;
+    TCGv_i64 temp, clean_addr;
+
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    over = gen_new_label(tcg_ctx);
 
     /* If the guarding predicate has no bits set, no load occurs.  */
     if (psz <= 8) {
@@ -5050,7 +5208,9 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
     /* Load the data.  */
     temp = tcg_temp_new_i64(tcg_ctx);
     tcg_gen_addi_i64(tcg_ctx, temp, cpu_reg_sp(s, a->rn), a->imm << msz);
-    tcg_gen_qemu_ld_i64(tcg_ctx, temp, temp, get_mem_index(s),
+    clean_addr = gen_mte_check1(s, temp, false, true, msz);
+
+    tcg_gen_qemu_ld_i64(tcg_ctx, temp, clean_addr, get_mem_index(s),
                         s->be_data | dtype_mop[a->dtype]);
 
     /* Broadcast to *all* elements.  */
@@ -5067,73 +5227,125 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
                       int msz, int esz, int nreg)
 {
-    static gen_helper_gvec_mem * const fn_single[2][4][4] = {
-        { { gen_helper_sve_st1bb_r,
-            gen_helper_sve_st1bh_r,
-            gen_helper_sve_st1bs_r,
-            gen_helper_sve_st1bd_r },
-          { NULL,
-            gen_helper_sve_st1hh_le_r,
-            gen_helper_sve_st1hs_le_r,
-            gen_helper_sve_st1hd_le_r },
-          { NULL, NULL,
-            gen_helper_sve_st1ss_le_r,
-            gen_helper_sve_st1sd_le_r },
-          { NULL, NULL, NULL,
-            gen_helper_sve_st1dd_le_r } },
-        { { gen_helper_sve_st1bb_r,
-            gen_helper_sve_st1bh_r,
-            gen_helper_sve_st1bs_r,
-            gen_helper_sve_st1bd_r },
-          { NULL,
-            gen_helper_sve_st1hh_be_r,
-            gen_helper_sve_st1hs_be_r,
-            gen_helper_sve_st1hd_be_r },
-          { NULL, NULL,
-            gen_helper_sve_st1ss_be_r,
-            gen_helper_sve_st1sd_be_r },
-          { NULL, NULL, NULL,
-            gen_helper_sve_st1dd_be_r } },
+    static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
+        { { { gen_helper_sve_st1bb_r,
+              gen_helper_sve_st1bh_r,
+              gen_helper_sve_st1bs_r,
+              gen_helper_sve_st1bd_r },
+            { NULL,
+              gen_helper_sve_st1hh_le_r,
+              gen_helper_sve_st1hs_le_r,
+              gen_helper_sve_st1hd_le_r },
+            { NULL, NULL,
+              gen_helper_sve_st1ss_le_r,
+              gen_helper_sve_st1sd_le_r },
+            { NULL, NULL, NULL,
+              gen_helper_sve_st1dd_le_r } },
+          { { gen_helper_sve_st1bb_r,
+              gen_helper_sve_st1bh_r,
+              gen_helper_sve_st1bs_r,
+              gen_helper_sve_st1bd_r },
+            { NULL,
+              gen_helper_sve_st1hh_be_r,
+              gen_helper_sve_st1hs_be_r,
+              gen_helper_sve_st1hd_be_r },
+            { NULL, NULL,
+              gen_helper_sve_st1ss_be_r,
+              gen_helper_sve_st1sd_be_r },
+            { NULL, NULL, NULL,
+              gen_helper_sve_st1dd_be_r } } },
+
+        { { { gen_helper_sve_st1bb_r_mte,
+              gen_helper_sve_st1bh_r_mte,
+              gen_helper_sve_st1bs_r_mte,
+              gen_helper_sve_st1bd_r_mte },
+            { NULL,
+              gen_helper_sve_st1hh_le_r_mte,
+              gen_helper_sve_st1hs_le_r_mte,
+              gen_helper_sve_st1hd_le_r_mte },
+            { NULL, NULL,
+              gen_helper_sve_st1ss_le_r_mte,
+              gen_helper_sve_st1sd_le_r_mte },
+            { NULL, NULL, NULL,
+              gen_helper_sve_st1dd_le_r_mte } },
+          { { gen_helper_sve_st1bb_r_mte,
+              gen_helper_sve_st1bh_r_mte,
+              gen_helper_sve_st1bs_r_mte,
+              gen_helper_sve_st1bd_r_mte },
+            { NULL,
+              gen_helper_sve_st1hh_be_r_mte,
+              gen_helper_sve_st1hs_be_r_mte,
+              gen_helper_sve_st1hd_be_r_mte },
+            { NULL, NULL,
+              gen_helper_sve_st1ss_be_r_mte,
+              gen_helper_sve_st1sd_be_r_mte },
+            { NULL, NULL, NULL,
+              gen_helper_sve_st1dd_be_r_mte } } },
     };
-    static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
-        { { gen_helper_sve_st2bb_r,
-            gen_helper_sve_st2hh_le_r,
-            gen_helper_sve_st2ss_le_r,
-            gen_helper_sve_st2dd_le_r },
-          { gen_helper_sve_st3bb_r,
-            gen_helper_sve_st3hh_le_r,
-            gen_helper_sve_st3ss_le_r,
-            gen_helper_sve_st3dd_le_r },
-          { gen_helper_sve_st4bb_r,
-            gen_helper_sve_st4hh_le_r,
-            gen_helper_sve_st4ss_le_r,
-            gen_helper_sve_st4dd_le_r } },
-        { { gen_helper_sve_st2bb_r,
-            gen_helper_sve_st2hh_be_r,
-            gen_helper_sve_st2ss_be_r,
-            gen_helper_sve_st2dd_be_r },
-          { gen_helper_sve_st3bb_r,
-            gen_helper_sve_st3hh_be_r,
-            gen_helper_sve_st3ss_be_r,
-            gen_helper_sve_st3dd_be_r },
-          { gen_helper_sve_st4bb_r,
-            gen_helper_sve_st4hh_be_r,
-            gen_helper_sve_st4ss_be_r,
-            gen_helper_sve_st4dd_be_r } },
+    static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
+        { { { gen_helper_sve_st2bb_r,
+              gen_helper_sve_st2hh_le_r,
+              gen_helper_sve_st2ss_le_r,
+              gen_helper_sve_st2dd_le_r },
+            { gen_helper_sve_st3bb_r,
+              gen_helper_sve_st3hh_le_r,
+              gen_helper_sve_st3ss_le_r,
+              gen_helper_sve_st3dd_le_r },
+            { gen_helper_sve_st4bb_r,
+              gen_helper_sve_st4hh_le_r,
+              gen_helper_sve_st4ss_le_r,
+              gen_helper_sve_st4dd_le_r } },
+          { { gen_helper_sve_st2bb_r,
+              gen_helper_sve_st2hh_be_r,
+              gen_helper_sve_st2ss_be_r,
+              gen_helper_sve_st2dd_be_r },
+            { gen_helper_sve_st3bb_r,
+              gen_helper_sve_st3hh_be_r,
+              gen_helper_sve_st3ss_be_r,
+              gen_helper_sve_st3dd_be_r },
+            { gen_helper_sve_st4bb_r,
+              gen_helper_sve_st4hh_be_r,
+              gen_helper_sve_st4ss_be_r,
+              gen_helper_sve_st4dd_be_r } } },
+        { { { gen_helper_sve_st2bb_r_mte,
+              gen_helper_sve_st2hh_le_r_mte,
+              gen_helper_sve_st2ss_le_r_mte,
+              gen_helper_sve_st2dd_le_r_mte },
+            { gen_helper_sve_st3bb_r_mte,
+              gen_helper_sve_st3hh_le_r_mte,
+              gen_helper_sve_st3ss_le_r_mte,
+              gen_helper_sve_st3dd_le_r_mte },
+            { gen_helper_sve_st4bb_r_mte,
+              gen_helper_sve_st4hh_le_r_mte,
+              gen_helper_sve_st4ss_le_r_mte,
+              gen_helper_sve_st4dd_le_r_mte } },
+          { { gen_helper_sve_st2bb_r_mte,
+              gen_helper_sve_st2hh_be_r_mte,
+              gen_helper_sve_st2ss_be_r_mte,
+              gen_helper_sve_st2dd_be_r_mte },
+            { gen_helper_sve_st3bb_r_mte,
+              gen_helper_sve_st3hh_be_r_mte,
+              gen_helper_sve_st3ss_be_r_mte,
+              gen_helper_sve_st3dd_be_r_mte },
+            { gen_helper_sve_st4bb_r_mte,
+              gen_helper_sve_st4hh_be_r_mte,
+              gen_helper_sve_st4ss_be_r_mte,
+              gen_helper_sve_st4dd_be_r_mte } } },
     };
     gen_helper_gvec_mem *fn;
     int be = s->be_data == MO_BE;
 
     if (nreg == 0) {
         /* ST1 */
-        fn = fn_single[be][msz][esz];
+        fn = fn_single[s->mte_active[0]][be][msz][esz];
+        nreg = 1;
     } else {
         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
         assert(msz == esz);
-        fn = fn_multiple[be][nreg - 1][msz];
+        fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
     }
     assert(fn != NULL);
-    do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn);
+    do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
 }
 
 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
@@ -5174,7 +5386,7 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
  */
 
 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
-                       int scale, TCGv_i64 scalar, int msz,
+                       int scale, TCGv_i64 scalar, int msz, bool is_write,
                        gen_helper_gvec_mem_scatter *fn)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
@@ -5183,11 +5395,17 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
     TCGv_ptr t_pg = tcg_temp_new_ptr(tcg_ctx);
     TCGv_ptr t_zt = tcg_temp_new_ptr(tcg_ctx);
     TCGv_i32 t_desc;
-    int desc;
+    int desc = 0;
 
-    desc = sve_memopidx(s, msz_dtype(s, msz));
-    desc |= scale << MEMOPIDX_SHIFT;
-    desc = simd_desc(vsz, vsz, desc);
+    if (s->mte_active[0]) {
+        FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc);
+        FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc);
+        FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc);
+        FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc);
+        FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz, desc);
+        desc <<= SVE_MTEDESC_SHIFT;
+    }
+    desc = simd_desc(vsz, vsz, desc | scale);
     t_desc = tcg_const_i32(tcg_ctx, desc);
 
     tcg_gen_addi_ptr(tcg_ctx, t_pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, pg));
@@ -5201,176 +5419,339 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
     tcg_temp_free_i32(tcg_ctx, t_desc);
 }
 
-/* Indexed by [be][ff][xs][u][msz].  */
-static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
-    /* Little-endian */
-    { { { { gen_helper_sve_ldbss_zsu,
-            gen_helper_sve_ldhss_le_zsu,
-            NULL, },
-          { gen_helper_sve_ldbsu_zsu,
-            gen_helper_sve_ldhsu_le_zsu,
-            gen_helper_sve_ldss_le_zsu, } },
-        { { gen_helper_sve_ldbss_zss,
-            gen_helper_sve_ldhss_le_zss,
-            NULL, },
-          { gen_helper_sve_ldbsu_zss,
-            gen_helper_sve_ldhsu_le_zss,
-            gen_helper_sve_ldss_le_zss, } } },
-
-      /* First-fault */
-      { { { gen_helper_sve_ldffbss_zsu,
-            gen_helper_sve_ldffhss_le_zsu,
-            NULL, },
-          { gen_helper_sve_ldffbsu_zsu,
-            gen_helper_sve_ldffhsu_le_zsu,
-            gen_helper_sve_ldffss_le_zsu, } },
-        { { gen_helper_sve_ldffbss_zss,
-            gen_helper_sve_ldffhss_le_zss,
-            NULL, },
-          { gen_helper_sve_ldffbsu_zss,
-            gen_helper_sve_ldffhsu_le_zss,
-            gen_helper_sve_ldffss_le_zss, } } } },
-
-    /* Big-endian */
-    { { { { gen_helper_sve_ldbss_zsu,
-            gen_helper_sve_ldhss_be_zsu,
-            NULL, },
-          { gen_helper_sve_ldbsu_zsu,
-            gen_helper_sve_ldhsu_be_zsu,
-            gen_helper_sve_ldss_be_zsu, } },
-        { { gen_helper_sve_ldbss_zss,
-            gen_helper_sve_ldhss_be_zss,
-            NULL, },
-          { gen_helper_sve_ldbsu_zss,
-            gen_helper_sve_ldhsu_be_zss,
-            gen_helper_sve_ldss_be_zss, } } },
-
-      /* First-fault */
-      { { { gen_helper_sve_ldffbss_zsu,
-            gen_helper_sve_ldffhss_be_zsu,
-            NULL, },
-          { gen_helper_sve_ldffbsu_zsu,
-            gen_helper_sve_ldffhsu_be_zsu,
-            gen_helper_sve_ldffss_be_zsu, } },
-        { { gen_helper_sve_ldffbss_zss,
-            gen_helper_sve_ldffhss_be_zss,
-            NULL, },
-          { gen_helper_sve_ldffbsu_zss,
-            gen_helper_sve_ldffhsu_be_zss,
-            gen_helper_sve_ldffss_be_zss, } } } },
+/* Indexed by [mte][be][ff][xs][u][msz].  */
+static gen_helper_gvec_mem_scatter * const
+gather_load_fn32[2][2][2][2][2][3] = {
+    { /* MTE Inactive */
+        { /* Little-endian */
+            { { { gen_helper_sve_ldbss_zsu,
+                  gen_helper_sve_ldhss_le_zsu,
+                  NULL, },
+                { gen_helper_sve_ldbsu_zsu,
+                  gen_helper_sve_ldhsu_le_zsu,
+                  gen_helper_sve_ldss_le_zsu, } },
+              { { gen_helper_sve_ldbss_zss,
+                  gen_helper_sve_ldhss_le_zss,
+                  NULL, },
+                { gen_helper_sve_ldbsu_zss,
+                  gen_helper_sve_ldhsu_le_zss,
+                  gen_helper_sve_ldss_le_zss, } } },
+
+            /* First-fault */
+            { { { gen_helper_sve_ldffbss_zsu,
+                  gen_helper_sve_ldffhss_le_zsu,
+                  NULL, },
+                { gen_helper_sve_ldffbsu_zsu,
+                  gen_helper_sve_ldffhsu_le_zsu,
+                  gen_helper_sve_ldffss_le_zsu, } },
+              { { gen_helper_sve_ldffbss_zss,
+                  gen_helper_sve_ldffhss_le_zss,
+                  NULL, },
+                { gen_helper_sve_ldffbsu_zss,
+                  gen_helper_sve_ldffhsu_le_zss,
+                  gen_helper_sve_ldffss_le_zss, } } } },
+
+        { /* Big-endian */
+            { { { gen_helper_sve_ldbss_zsu,
+                  gen_helper_sve_ldhss_be_zsu,
+                  NULL, },
+                { gen_helper_sve_ldbsu_zsu,
+                  gen_helper_sve_ldhsu_be_zsu,
+                  gen_helper_sve_ldss_be_zsu, } },
+              { { gen_helper_sve_ldbss_zss,
+                  gen_helper_sve_ldhss_be_zss,
+                  NULL, },
+                { gen_helper_sve_ldbsu_zss,
+                  gen_helper_sve_ldhsu_be_zss,
+                  gen_helper_sve_ldss_be_zss, } } },
+
+            /* First-fault */
+            { { { gen_helper_sve_ldffbss_zsu,
+                  gen_helper_sve_ldffhss_be_zsu,
+                  NULL, },
+                { gen_helper_sve_ldffbsu_zsu,
+                  gen_helper_sve_ldffhsu_be_zsu,
+                  gen_helper_sve_ldffss_be_zsu, } },
+              { { gen_helper_sve_ldffbss_zss,
+                  gen_helper_sve_ldffhss_be_zss,
+                  NULL, },
+                { gen_helper_sve_ldffbsu_zss,
+                  gen_helper_sve_ldffhsu_be_zss,
+                  gen_helper_sve_ldffss_be_zss, } } } } },
+    { /* MTE Active */
+        { /* Little-endian */
+            { { { gen_helper_sve_ldbss_zsu_mte,
+                  gen_helper_sve_ldhss_le_zsu_mte,
+                  NULL, },
+                { gen_helper_sve_ldbsu_zsu_mte,
+                  gen_helper_sve_ldhsu_le_zsu_mte,
+                  gen_helper_sve_ldss_le_zsu_mte, } },
+              { { gen_helper_sve_ldbss_zss_mte,
+                  gen_helper_sve_ldhss_le_zss_mte,
+                  NULL, },
+                { gen_helper_sve_ldbsu_zss_mte,
+                  gen_helper_sve_ldhsu_le_zss_mte,
+                  gen_helper_sve_ldss_le_zss_mte, } } },
+
+            /* First-fault */
+            { { { gen_helper_sve_ldffbss_zsu_mte,
+                  gen_helper_sve_ldffhss_le_zsu_mte,
+                  NULL, },
+                { gen_helper_sve_ldffbsu_zsu_mte,
+                  gen_helper_sve_ldffhsu_le_zsu_mte,
+                  gen_helper_sve_ldffss_le_zsu_mte, } },
+              { { gen_helper_sve_ldffbss_zss_mte,
+                  gen_helper_sve_ldffhss_le_zss_mte,
+                  NULL, },
+                { gen_helper_sve_ldffbsu_zss_mte,
+                  gen_helper_sve_ldffhsu_le_zss_mte,
+                  gen_helper_sve_ldffss_le_zss_mte, } } } },
+
+        { /* Big-endian */
+            { { { gen_helper_sve_ldbss_zsu_mte,
+                  gen_helper_sve_ldhss_be_zsu_mte,
+                  NULL, },
+                { gen_helper_sve_ldbsu_zsu_mte,
+                  gen_helper_sve_ldhsu_be_zsu_mte,
+                  gen_helper_sve_ldss_be_zsu_mte, } },
+              { { gen_helper_sve_ldbss_zss_mte,
+                  gen_helper_sve_ldhss_be_zss_mte,
+                  NULL, },
+                { gen_helper_sve_ldbsu_zss_mte,
+                  gen_helper_sve_ldhsu_be_zss_mte,
+                  gen_helper_sve_ldss_be_zss_mte, } } },
+
+            /* First-fault */
+            { { { gen_helper_sve_ldffbss_zsu_mte,
+                  gen_helper_sve_ldffhss_be_zsu_mte,
+                  NULL, },
+                { gen_helper_sve_ldffbsu_zsu_mte,
+                  gen_helper_sve_ldffhsu_be_zsu_mte,
+                  gen_helper_sve_ldffss_be_zsu_mte, } },
+              { { gen_helper_sve_ldffbss_zss_mte,
+                  gen_helper_sve_ldffhss_be_zss_mte,
+                  NULL, },
+                { gen_helper_sve_ldffbsu_zss_mte,
+                  gen_helper_sve_ldffhsu_be_zss_mte,
+                  gen_helper_sve_ldffss_be_zss_mte, } } } } },
 };
 
 /* Note that we overload xs=2 to indicate 64-bit offset.  */
-static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
-    /* Little-endian */
-    { { { { gen_helper_sve_ldbds_zsu,
-            gen_helper_sve_ldhds_le_zsu,
-            gen_helper_sve_ldsds_le_zsu,
-            NULL, },
-          { gen_helper_sve_ldbdu_zsu,
-            gen_helper_sve_ldhdu_le_zsu,
-            gen_helper_sve_ldsdu_le_zsu,
-            gen_helper_sve_lddd_le_zsu, } },
-        { { gen_helper_sve_ldbds_zss,
-            gen_helper_sve_ldhds_le_zss,
-            gen_helper_sve_ldsds_le_zss,
-            NULL, },
-          { gen_helper_sve_ldbdu_zss,
-            gen_helper_sve_ldhdu_le_zss,
-            gen_helper_sve_ldsdu_le_zss,
-            gen_helper_sve_lddd_le_zss, } },
-        { { gen_helper_sve_ldbds_zd,
-            gen_helper_sve_ldhds_le_zd,
-            gen_helper_sve_ldsds_le_zd,
-            NULL, },
-          { gen_helper_sve_ldbdu_zd,
-            gen_helper_sve_ldhdu_le_zd,
-            gen_helper_sve_ldsdu_le_zd,
-            gen_helper_sve_lddd_le_zd, } } },
-
-      /* First-fault */
-      { { { gen_helper_sve_ldffbds_zsu,
-            gen_helper_sve_ldffhds_le_zsu,
-            gen_helper_sve_ldffsds_le_zsu,
-            NULL, },
-          { gen_helper_sve_ldffbdu_zsu,
-            gen_helper_sve_ldffhdu_le_zsu,
-            gen_helper_sve_ldffsdu_le_zsu,
-            gen_helper_sve_ldffdd_le_zsu, } },
-        { { gen_helper_sve_ldffbds_zss,
-            gen_helper_sve_ldffhds_le_zss,
-            gen_helper_sve_ldffsds_le_zss,
-            NULL, },
-          { gen_helper_sve_ldffbdu_zss,
-            gen_helper_sve_ldffhdu_le_zss,
-            gen_helper_sve_ldffsdu_le_zss,
-            gen_helper_sve_ldffdd_le_zss, } },
-        { { gen_helper_sve_ldffbds_zd,
-            gen_helper_sve_ldffhds_le_zd,
-            gen_helper_sve_ldffsds_le_zd,
-            NULL, },
-          { gen_helper_sve_ldffbdu_zd,
-            gen_helper_sve_ldffhdu_le_zd,
-            gen_helper_sve_ldffsdu_le_zd,
-            gen_helper_sve_ldffdd_le_zd, } } } },
-
-    /* Big-endian */
-    { { { { gen_helper_sve_ldbds_zsu,
-            gen_helper_sve_ldhds_be_zsu,
-            gen_helper_sve_ldsds_be_zsu,
-            NULL, },
-          { gen_helper_sve_ldbdu_zsu,
-            gen_helper_sve_ldhdu_be_zsu,
-            gen_helper_sve_ldsdu_be_zsu,
-            gen_helper_sve_lddd_be_zsu, } },
-        { { gen_helper_sve_ldbds_zss,
-            gen_helper_sve_ldhds_be_zss,
-            gen_helper_sve_ldsds_be_zss,
-            NULL, },
-          { gen_helper_sve_ldbdu_zss,
-            gen_helper_sve_ldhdu_be_zss,
-            gen_helper_sve_ldsdu_be_zss,
-            gen_helper_sve_lddd_be_zss, } },
-        { { gen_helper_sve_ldbds_zd,
-            gen_helper_sve_ldhds_be_zd,
-            gen_helper_sve_ldsds_be_zd,
-            NULL, },
-          { gen_helper_sve_ldbdu_zd,
-            gen_helper_sve_ldhdu_be_zd,
-            gen_helper_sve_ldsdu_be_zd,
-            gen_helper_sve_lddd_be_zd, } } },
-
-      /* First-fault */
-      { { { gen_helper_sve_ldffbds_zsu,
-            gen_helper_sve_ldffhds_be_zsu,
-            gen_helper_sve_ldffsds_be_zsu,
-            NULL, },
-          { gen_helper_sve_ldffbdu_zsu,
-            gen_helper_sve_ldffhdu_be_zsu,
-            gen_helper_sve_ldffsdu_be_zsu,
-            gen_helper_sve_ldffdd_be_zsu, } },
-        { { gen_helper_sve_ldffbds_zss,
-            gen_helper_sve_ldffhds_be_zss,
-            gen_helper_sve_ldffsds_be_zss,
-            NULL, },
-          { gen_helper_sve_ldffbdu_zss,
-            gen_helper_sve_ldffhdu_be_zss,
-            gen_helper_sve_ldffsdu_be_zss,
-            gen_helper_sve_ldffdd_be_zss, } },
-        { { gen_helper_sve_ldffbds_zd,
-            gen_helper_sve_ldffhds_be_zd,
-            gen_helper_sve_ldffsds_be_zd,
-            NULL, },
-          { gen_helper_sve_ldffbdu_zd,
-            gen_helper_sve_ldffhdu_be_zd,
-            gen_helper_sve_ldffsdu_be_zd,
-            gen_helper_sve_ldffdd_be_zd, } } } },
+static gen_helper_gvec_mem_scatter * const
+gather_load_fn64[2][2][2][3][2][4] = {
+    { /* MTE Inactive */
+        { /* Little-endian */
+            { { { gen_helper_sve_ldbds_zsu,
+                  gen_helper_sve_ldhds_le_zsu,
+                  gen_helper_sve_ldsds_le_zsu,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zsu,
+                  gen_helper_sve_ldhdu_le_zsu,
+                  gen_helper_sve_ldsdu_le_zsu,
+                  gen_helper_sve_lddd_le_zsu, } },
+              { { gen_helper_sve_ldbds_zss,
+                  gen_helper_sve_ldhds_le_zss,
+                  gen_helper_sve_ldsds_le_zss,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zss,
+                  gen_helper_sve_ldhdu_le_zss,
+                  gen_helper_sve_ldsdu_le_zss,
+                  gen_helper_sve_lddd_le_zss, } },
+              { { gen_helper_sve_ldbds_zd,
+                  gen_helper_sve_ldhds_le_zd,
+                  gen_helper_sve_ldsds_le_zd,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zd,
+                  gen_helper_sve_ldhdu_le_zd,
+                  gen_helper_sve_ldsdu_le_zd,
+                  gen_helper_sve_lddd_le_zd, } } },
+
+            /* First-fault */
+            { { { gen_helper_sve_ldffbds_zsu,
+                  gen_helper_sve_ldffhds_le_zsu,
+                  gen_helper_sve_ldffsds_le_zsu,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zsu,
+                  gen_helper_sve_ldffhdu_le_zsu,
+                  gen_helper_sve_ldffsdu_le_zsu,
+                  gen_helper_sve_ldffdd_le_zsu, } },
+              { { gen_helper_sve_ldffbds_zss,
+                  gen_helper_sve_ldffhds_le_zss,
+                  gen_helper_sve_ldffsds_le_zss,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zss,
+                  gen_helper_sve_ldffhdu_le_zss,
+                  gen_helper_sve_ldffsdu_le_zss,
+                  gen_helper_sve_ldffdd_le_zss, } },
+              { { gen_helper_sve_ldffbds_zd,
+                  gen_helper_sve_ldffhds_le_zd,
+                  gen_helper_sve_ldffsds_le_zd,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zd,
+                  gen_helper_sve_ldffhdu_le_zd,
+                  gen_helper_sve_ldffsdu_le_zd,
+                  gen_helper_sve_ldffdd_le_zd, } } } },
+        { /* Big-endian */
+            { { { gen_helper_sve_ldbds_zsu,
+                  gen_helper_sve_ldhds_be_zsu,
+                  gen_helper_sve_ldsds_be_zsu,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zsu,
+                  gen_helper_sve_ldhdu_be_zsu,
+                  gen_helper_sve_ldsdu_be_zsu,
+                  gen_helper_sve_lddd_be_zsu, } },
+              { { gen_helper_sve_ldbds_zss,
+                  gen_helper_sve_ldhds_be_zss,
+                  gen_helper_sve_ldsds_be_zss,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zss,
+                  gen_helper_sve_ldhdu_be_zss,
+                  gen_helper_sve_ldsdu_be_zss,
+                  gen_helper_sve_lddd_be_zss, } },
+              { { gen_helper_sve_ldbds_zd,
+                  gen_helper_sve_ldhds_be_zd,
+                  gen_helper_sve_ldsds_be_zd,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zd,
+                  gen_helper_sve_ldhdu_be_zd,
+                  gen_helper_sve_ldsdu_be_zd,
+                  gen_helper_sve_lddd_be_zd, } } },
+
+            /* First-fault */
+            { { { gen_helper_sve_ldffbds_zsu,
+                  gen_helper_sve_ldffhds_be_zsu,
+                  gen_helper_sve_ldffsds_be_zsu,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zsu,
+                  gen_helper_sve_ldffhdu_be_zsu,
+                  gen_helper_sve_ldffsdu_be_zsu,
+                  gen_helper_sve_ldffdd_be_zsu, } },
+              { { gen_helper_sve_ldffbds_zss,
+                  gen_helper_sve_ldffhds_be_zss,
+                  gen_helper_sve_ldffsds_be_zss,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zss,
+                  gen_helper_sve_ldffhdu_be_zss,
+                  gen_helper_sve_ldffsdu_be_zss,
+                  gen_helper_sve_ldffdd_be_zss, } },
+              { { gen_helper_sve_ldffbds_zd,
+                  gen_helper_sve_ldffhds_be_zd,
+                  gen_helper_sve_ldffsds_be_zd,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zd,
+                  gen_helper_sve_ldffhdu_be_zd,
+                  gen_helper_sve_ldffsdu_be_zd,
+                  gen_helper_sve_ldffdd_be_zd, } } } } },
+    { /* MTE Active */
+        { /* Little-endian */
+            { { { gen_helper_sve_ldbds_zsu_mte,
+                  gen_helper_sve_ldhds_le_zsu_mte,
+                  gen_helper_sve_ldsds_le_zsu_mte,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zsu_mte,
+                  gen_helper_sve_ldhdu_le_zsu_mte,
+                  gen_helper_sve_ldsdu_le_zsu_mte,
+                  gen_helper_sve_lddd_le_zsu_mte, } },
+              { { gen_helper_sve_ldbds_zss_mte,
+                  gen_helper_sve_ldhds_le_zss_mte,
+                  gen_helper_sve_ldsds_le_zss_mte,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zss_mte,
+                  gen_helper_sve_ldhdu_le_zss_mte,
+                  gen_helper_sve_ldsdu_le_zss_mte,
+                  gen_helper_sve_lddd_le_zss_mte, } },
+              { { gen_helper_sve_ldbds_zd_mte,
+                  gen_helper_sve_ldhds_le_zd_mte,
+                  gen_helper_sve_ldsds_le_zd_mte,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zd_mte,
+                  gen_helper_sve_ldhdu_le_zd_mte,
+                  gen_helper_sve_ldsdu_le_zd_mte,
+                  gen_helper_sve_lddd_le_zd_mte, } } },
+
+            /* First-fault */
+            { { { gen_helper_sve_ldffbds_zsu_mte,
+                  gen_helper_sve_ldffhds_le_zsu_mte,
+                  gen_helper_sve_ldffsds_le_zsu_mte,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zsu_mte,
+                  gen_helper_sve_ldffhdu_le_zsu_mte,
+                  gen_helper_sve_ldffsdu_le_zsu_mte,
+                  gen_helper_sve_ldffdd_le_zsu_mte, } },
+              { { gen_helper_sve_ldffbds_zss_mte,
+                  gen_helper_sve_ldffhds_le_zss_mte,
+                  gen_helper_sve_ldffsds_le_zss_mte,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zss_mte,
+                  gen_helper_sve_ldffhdu_le_zss_mte,
+                  gen_helper_sve_ldffsdu_le_zss_mte,
+                  gen_helper_sve_ldffdd_le_zss_mte, } },
+              { { gen_helper_sve_ldffbds_zd_mte,
+                  gen_helper_sve_ldffhds_le_zd_mte,
+                  gen_helper_sve_ldffsds_le_zd_mte,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zd_mte,
+                  gen_helper_sve_ldffhdu_le_zd_mte,
+                  gen_helper_sve_ldffsdu_le_zd_mte,
+                  gen_helper_sve_ldffdd_le_zd_mte, } } } },
+        { /* Big-endian */
+            { { { gen_helper_sve_ldbds_zsu_mte,
+                  gen_helper_sve_ldhds_be_zsu_mte,
+                  gen_helper_sve_ldsds_be_zsu_mte,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zsu_mte,
+                  gen_helper_sve_ldhdu_be_zsu_mte,
+                  gen_helper_sve_ldsdu_be_zsu_mte,
+                  gen_helper_sve_lddd_be_zsu_mte, } },
+              { { gen_helper_sve_ldbds_zss_mte,
+                  gen_helper_sve_ldhds_be_zss_mte,
+                  gen_helper_sve_ldsds_be_zss_mte,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zss_mte,
+                  gen_helper_sve_ldhdu_be_zss_mte,
+                  gen_helper_sve_ldsdu_be_zss_mte,
+                  gen_helper_sve_lddd_be_zss_mte, } },
+              { { gen_helper_sve_ldbds_zd_mte,
+                  gen_helper_sve_ldhds_be_zd_mte,
+                  gen_helper_sve_ldsds_be_zd_mte,
+                  NULL, },
+                { gen_helper_sve_ldbdu_zd_mte,
+                  gen_helper_sve_ldhdu_be_zd_mte,
+                  gen_helper_sve_ldsdu_be_zd_mte,
+                  gen_helper_sve_lddd_be_zd_mte, } } },
+
+            /* First-fault */
+            { { { gen_helper_sve_ldffbds_zsu_mte,
+                  gen_helper_sve_ldffhds_be_zsu_mte,
+                  gen_helper_sve_ldffsds_be_zsu_mte,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zsu_mte,
+                  gen_helper_sve_ldffhdu_be_zsu_mte,
+                  gen_helper_sve_ldffsdu_be_zsu_mte,
+                  gen_helper_sve_ldffdd_be_zsu_mte, } },
+              { { gen_helper_sve_ldffbds_zss_mte,
+                  gen_helper_sve_ldffhds_be_zss_mte,
+                  gen_helper_sve_ldffsds_be_zss_mte,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zss_mte,
+                  gen_helper_sve_ldffhdu_be_zss_mte,
+                  gen_helper_sve_ldffsdu_be_zss_mte,
+                  gen_helper_sve_ldffdd_be_zss_mte, } },
+              { { gen_helper_sve_ldffbds_zd_mte,
+                  gen_helper_sve_ldffhds_be_zd_mte,
+                  gen_helper_sve_ldffsds_be_zd_mte,
+                  NULL, },
+                { gen_helper_sve_ldffbdu_zd_mte,
+                  gen_helper_sve_ldffhdu_be_zd_mte,
+                  gen_helper_sve_ldffsdu_be_zd_mte,
+                  gen_helper_sve_ldffdd_be_zd_mte, } } } } },
 };
 
 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
 {
     gen_helper_gvec_mem_scatter *fn = NULL;
-    int be = s->be_data == MO_BE;
+    bool be = s->be_data == MO_BE;
+    bool mte = s->mte_active[0];
 
     if (!sve_access_check(s)) {
         return true;
@@ -5378,16 +5759,16 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
 
     switch (a->esz) {
     case MO_32:
-        fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
+        fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
         break;
     case MO_64:
-        fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
+        fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
         break;
     }
     assert(fn != NULL);
 
     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
-               cpu_reg_sp(s, a->rn), a->msz, fn);
+               cpu_reg_sp(s, a->rn), a->msz, false, fn);
     return true;
 }
 
@@ -5395,7 +5776,8 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     gen_helper_gvec_mem_scatter *fn = NULL;
-    int be = s->be_data == MO_BE;
+    bool be = s->be_data == MO_BE;
+    bool mte = s->mte_active[0];
     TCGv_i64 imm;
 
     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
@@ -5407,10 +5789,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
 
     switch (a->esz) {
     case MO_32:
-        fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
+        fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
         break;
     case MO_64:
-        fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
+        fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
         break;
     }
     assert(fn != NULL);
@@ -5419,63 +5801,108 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
      * by loading the immediate into the scalar parameter.
      */
     imm = tcg_const_i64(tcg_ctx, a->imm << a->msz);
-    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
+    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
     tcg_temp_free_i64(tcg_ctx, imm);
     return true;
 }
 
-/* Indexed by [be][xs][msz].  */
-static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
-    /* Little-endian */
-    { { gen_helper_sve_stbs_zsu,
-        gen_helper_sve_sths_le_zsu,
-        gen_helper_sve_stss_le_zsu, },
-      { gen_helper_sve_stbs_zss,
-        gen_helper_sve_sths_le_zss,
-        gen_helper_sve_stss_le_zss, } },
-    /* Big-endian */
-    { { gen_helper_sve_stbs_zsu,
-        gen_helper_sve_sths_be_zsu,
-        gen_helper_sve_stss_be_zsu, },
-      { gen_helper_sve_stbs_zss,
-        gen_helper_sve_sths_be_zss,
-        gen_helper_sve_stss_be_zss, } },
+/* Indexed by [mte][be][xs][msz].  */
+static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
+    { /* MTE Inactive */
+        { /* Little-endian */
+            { gen_helper_sve_stbs_zsu,
+              gen_helper_sve_sths_le_zsu,
+              gen_helper_sve_stss_le_zsu, },
+            { gen_helper_sve_stbs_zss,
+              gen_helper_sve_sths_le_zss,
+              gen_helper_sve_stss_le_zss, } },
+        { /* Big-endian */
+            { gen_helper_sve_stbs_zsu,
+              gen_helper_sve_sths_be_zsu,
+              gen_helper_sve_stss_be_zsu, },
+            { gen_helper_sve_stbs_zss,
+              gen_helper_sve_sths_be_zss,
+              gen_helper_sve_stss_be_zss, } } },
+    { /* MTE Active */
+        { /* Little-endian */
+            { gen_helper_sve_stbs_zsu_mte,
+              gen_helper_sve_sths_le_zsu_mte,
+              gen_helper_sve_stss_le_zsu_mte, },
+            { gen_helper_sve_stbs_zss_mte,
+              gen_helper_sve_sths_le_zss_mte,
+              gen_helper_sve_stss_le_zss_mte, } },
+        { /* Big-endian */
+            { gen_helper_sve_stbs_zsu_mte,
+              gen_helper_sve_sths_be_zsu_mte,
+              gen_helper_sve_stss_be_zsu_mte, },
+            { gen_helper_sve_stbs_zss_mte,
+              gen_helper_sve_sths_be_zss_mte,
+              gen_helper_sve_stss_be_zss_mte, } } },
 };
 
 /* Note that we overload xs=2 to indicate 64-bit offset.  */
-static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
-    /* Little-endian */
-    { { gen_helper_sve_stbd_zsu,
-        gen_helper_sve_sthd_le_zsu,
-        gen_helper_sve_stsd_le_zsu,
-        gen_helper_sve_stdd_le_zsu, },
-      { gen_helper_sve_stbd_zss,
-        gen_helper_sve_sthd_le_zss,
-        gen_helper_sve_stsd_le_zss,
-        gen_helper_sve_stdd_le_zss, },
-      { gen_helper_sve_stbd_zd,
-        gen_helper_sve_sthd_le_zd,
-        gen_helper_sve_stsd_le_zd,
-        gen_helper_sve_stdd_le_zd, } },
-    /* Big-endian */
-    { { gen_helper_sve_stbd_zsu,
-        gen_helper_sve_sthd_be_zsu,
-        gen_helper_sve_stsd_be_zsu,
-        gen_helper_sve_stdd_be_zsu, },
-      { gen_helper_sve_stbd_zss,
-        gen_helper_sve_sthd_be_zss,
-        gen_helper_sve_stsd_be_zss,
-        gen_helper_sve_stdd_be_zss, },
-      { gen_helper_sve_stbd_zd,
-        gen_helper_sve_sthd_be_zd,
-        gen_helper_sve_stsd_be_zd,
-        gen_helper_sve_stdd_be_zd, } },
+static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
+    { /* MTE Inactive */
+         { /* Little-endian */
+             { gen_helper_sve_stbd_zsu,
+               gen_helper_sve_sthd_le_zsu,
+               gen_helper_sve_stsd_le_zsu,
+               gen_helper_sve_stdd_le_zsu, },
+             { gen_helper_sve_stbd_zss,
+               gen_helper_sve_sthd_le_zss,
+               gen_helper_sve_stsd_le_zss,
+               gen_helper_sve_stdd_le_zss, },
+             { gen_helper_sve_stbd_zd,
+               gen_helper_sve_sthd_le_zd,
+               gen_helper_sve_stsd_le_zd,
+               gen_helper_sve_stdd_le_zd, } },
+         { /* Big-endian */
+             { gen_helper_sve_stbd_zsu,
+               gen_helper_sve_sthd_be_zsu,
+               gen_helper_sve_stsd_be_zsu,
+               gen_helper_sve_stdd_be_zsu, },
+             { gen_helper_sve_stbd_zss,
+               gen_helper_sve_sthd_be_zss,
+               gen_helper_sve_stsd_be_zss,
+               gen_helper_sve_stdd_be_zss, },
+             { gen_helper_sve_stbd_zd,
+               gen_helper_sve_sthd_be_zd,
+               gen_helper_sve_stsd_be_zd,
+               gen_helper_sve_stdd_be_zd, } } },
+    { /* MTE Inactive */
+         { /* Little-endian */
+             { gen_helper_sve_stbd_zsu_mte,
+               gen_helper_sve_sthd_le_zsu_mte,
+               gen_helper_sve_stsd_le_zsu_mte,
+               gen_helper_sve_stdd_le_zsu_mte, },
+             { gen_helper_sve_stbd_zss_mte,
+               gen_helper_sve_sthd_le_zss_mte,
+               gen_helper_sve_stsd_le_zss_mte,
+               gen_helper_sve_stdd_le_zss_mte, },
+             { gen_helper_sve_stbd_zd_mte,
+               gen_helper_sve_sthd_le_zd_mte,
+               gen_helper_sve_stsd_le_zd_mte,
+               gen_helper_sve_stdd_le_zd_mte, } },
+         { /* Big-endian */
+             { gen_helper_sve_stbd_zsu_mte,
+               gen_helper_sve_sthd_be_zsu_mte,
+               gen_helper_sve_stsd_be_zsu_mte,
+               gen_helper_sve_stdd_be_zsu_mte, },
+             { gen_helper_sve_stbd_zss_mte,
+               gen_helper_sve_sthd_be_zss_mte,
+               gen_helper_sve_stsd_be_zss_mte,
+               gen_helper_sve_stdd_be_zss_mte, },
+             { gen_helper_sve_stbd_zd_mte,
+               gen_helper_sve_sthd_be_zd_mte,
+               gen_helper_sve_stsd_be_zd_mte,
+               gen_helper_sve_stdd_be_zd_mte, } } },
 };
 
 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
 {
     gen_helper_gvec_mem_scatter *fn = NULL;
-    int be = s->be_data == MO_BE;
+    bool be = s->be_data == MO_BE;
+    bool mte = s->mte_active[0];
 
     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
         return false;
@@ -5485,16 +5912,16 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
     }
     switch (a->esz) {
     case MO_32:
-        fn = scatter_store_fn32[be][a->xs][a->msz];
+        fn = scatter_store_fn32[mte][be][a->xs][a->msz];
         break;
     case MO_64:
-        fn = scatter_store_fn64[be][a->xs][a->msz];
+        fn = scatter_store_fn64[mte][be][a->xs][a->msz];
         break;
     default:
         g_assert_not_reached();
     }
     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
-               cpu_reg_sp(s, a->rn), a->msz, fn);
+               cpu_reg_sp(s, a->rn), a->msz, true, fn);
     return true;
 }
 
@@ -5502,7 +5929,8 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     gen_helper_gvec_mem_scatter *fn = NULL;
-    int be = s->be_data == MO_BE;
+    bool be = s->be_data == MO_BE;
+    bool mte = s->mte_active[0];
     TCGv_i64 imm;
 
     if (a->esz < a->msz) {
@@ -5514,10 +5942,10 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
 
     switch (a->esz) {
     case MO_32:
-        fn = scatter_store_fn32[be][0][a->msz];
+        fn = scatter_store_fn32[mte][be][0][a->msz];
         break;
     case MO_64:
-        fn = scatter_store_fn64[be][2][a->msz];
+        fn = scatter_store_fn64[mte][be][2][a->msz];
         break;
     }
     assert(fn != NULL);
@@ -5526,7 +5954,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
      * by loading the immediate into the scalar parameter.
      */
     imm = tcg_const_i64(tcg_ctx, a->imm << a->msz);
-    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
+    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
     tcg_temp_free_i64(tcg_ctx, imm);
     return true;
 }
diff --git a/qemu/target/arm/translate-vfp.inc.c b/qemu/target/arm/translate-vfp.inc.c
index 4773efb9b2..e0fd1dfda9 100644
--- a/qemu/target/arm/translate-vfp.inc.c
+++ b/qemu/target/arm/translate-vfp.inc.c
@@ -122,15 +122,14 @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
         if (s->v7m_lspact) {
             /*
              * Lazy state saving affects external memory and also the NVIC,
-             * so we must mark it as an IO operation for icount.
+             * so we must mark it as an IO operation for icount (and cause
+             * this to be the last insn in the TB).
              */
             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+                s->base.is_jmp = DISAS_UPDATE_EXIT;
                 gen_io_start(tcg_ctx);
             }
             gen_helper_v7m_preserve_fp_state(tcg_ctx, tcg_ctx->cpu_env);
-            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-                gen_io_end(tcg_ctx);
-            }
             /*
              * If the preserve_fp_state helper doesn't throw an exception
              * then it will clear LSPACT; we don't need to repeat this for
@@ -1911,12 +1910,6 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
         return false;
     }
 
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
     if (!vfp_access_check(s)) {
         return true;
     }
@@ -2930,6 +2923,6 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
     tcg_temp_free_i32(tcg_ctx, fptr);
 
     /* End the TB, because we have updated FP control bits */
-    s->base.is_jmp = DISAS_UPDATE;
+    s->base.is_jmp = DISAS_UPDATE_EXIT;
     return true;
 }
diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c
index 744d8ff709..489db79713 100644
--- a/qemu/target/arm/translate.c
+++ b/qemu/target/arm/translate.c
@@ -368,47 +368,10 @@ static void gen_revsh(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_i32 var)
     tcg_gen_ext16s_i32(tcg_ctx, dest, var);
 }
 
-/* 32x32->64 multiply.  Marks inputs as dead.  */
-static TCGv_i64 gen_mulu_i64_i32(TCGContext *tcg_ctx, TCGv_i32 a, TCGv_i32 b)
-{
-    TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx);
-    TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx);
-    TCGv_i64 ret;
-
-    tcg_gen_mulu2_i32(tcg_ctx, lo, hi, a, b);
-    tcg_temp_free_i32(tcg_ctx, a);
-    tcg_temp_free_i32(tcg_ctx, b);
-
-    ret = tcg_temp_new_i64(tcg_ctx);
-    tcg_gen_concat_i32_i64(tcg_ctx, ret, lo, hi);
-    tcg_temp_free_i32(tcg_ctx, lo);
-    tcg_temp_free_i32(tcg_ctx, hi);
-
-    return ret;
-}
-
-static TCGv_i64 gen_muls_i64_i32(TCGContext *tcg_ctx, TCGv_i32 a, TCGv_i32 b)
-{
-    TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx);
-    TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx);
-    TCGv_i64 ret;
-
-    tcg_gen_muls2_i32(tcg_ctx, lo, hi, a, b);
-    tcg_temp_free_i32(tcg_ctx, a);
-    tcg_temp_free_i32(tcg_ctx, b);
-
-    ret = tcg_temp_new_i64(tcg_ctx);
-    tcg_gen_concat_i32_i64(tcg_ctx, ret, lo, hi);
-    tcg_temp_free_i32(tcg_ctx, lo);
-    tcg_temp_free_i32(tcg_ctx, hi);
-
-    return ret;
-}
-
 /* Swap low and high halfwords.  */
-static void gen_swap_half(TCGContext *tcg_ctx, TCGv_i32 var)
+static void gen_swap_half(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_i32 var)
 {
-    tcg_gen_rotri_i32(tcg_ctx, var, var, 16);
+    tcg_gen_rotri_i32(tcg_ctx, dest, var, 16);
 }
 
 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
@@ -1197,25 +1160,6 @@ neon_reg_offset (int reg, int n)
     return vfp_reg_offset(0, sreg);
 }
 
-/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
- * where 0 is the least significant end of the register.
- */
-static inline long
-neon_element_offset(int reg, int element, MemOp size)
-{
-    int element_size = 1 << size;
-    int ofs = element * element_size;
-#ifdef HOST_WORDS_BIGENDIAN
-    /* Calculate the offset assuming fully little-endian,
-     * then XOR to account for the order of the 8-byte units.
-     */
-    if (element_size < 8) {
-        ofs ^= 8 - element_size;
-    }
-#endif
-    return neon_reg_offset(reg, 0) + ofs;
-}
-
 static TCGv_i32 neon_load_reg(TCGContext *tcg_ctx, int reg, int pass)
 {
     TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
@@ -1223,98 +1167,12 @@ static TCGv_i32 neon_load_reg(TCGContext *tcg_ctx, int reg, int pass)
     return tmp;
 }
 
-static void neon_load_element(TCGContext *tcg_ctx, TCGv_i32 var, int reg, int ele, MemOp mop)
-{
-    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
-
-    switch (mop) {
-    case MO_UB:
-        tcg_gen_ld8u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    case MO_UW:
-        tcg_gen_ld16u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    case MO_UL:
-        tcg_gen_ld_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-        break;
-    }
-}
-
-static void neon_load_element64(TCGContext *tcg_ctx, TCGv_i64 var, int reg, int ele, MemOp mop)
-{
-    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
-
-    switch (mop) {
-    case MO_UB:
-        tcg_gen_ld8u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    case MO_UW:
-        tcg_gen_ld16u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    case MO_UL:
-        tcg_gen_ld32u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    case MO_Q:
-        tcg_gen_ld_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-        break;
-    }
-}
-
 static void neon_store_reg(TCGContext *tcg_ctx, int reg, int pass, TCGv_i32 var)
 {
     tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, neon_reg_offset(reg, pass));
     tcg_temp_free_i32(tcg_ctx, var);
 }
 
-static void neon_store_element(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i32 var)
-{
-    long offset = neon_element_offset(reg, ele, size);
-
-    switch (size) {
-    case MO_8:
-        tcg_gen_st8_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    case MO_16:
-        tcg_gen_st16_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    case MO_32:
-        tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-        break;
-    }
-}
-
-static void neon_store_element64(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i64 var)
-{
-    long offset = neon_element_offset(reg, ele, size);
-
-    switch (size) {
-    case MO_8:
-        tcg_gen_st8_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    case MO_16:
-        tcg_gen_st16_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    case MO_32:
-        tcg_gen_st32_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    case MO_64:
-        tcg_gen_st_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-        break;
-    }
-}
-
 static inline void neon_load_reg64(TCGContext *tcg_ctx, TCGv_i64 var, int reg)
 {
     tcg_gen_ld_i64(tcg_ctx, var, tcg_ctx->cpu_env, vfp_reg_offset(1, reg));
@@ -1344,8 +1202,9 @@ static TCGv_ptr vfp_reg_ptr(TCGContext *tcg_ctx, bool dp, int reg)
 
 #define ARM_CP_RW_BIT   (1 << 20)
 
-/* Include the VFP decoder */
+/* Include the VFP and Neon decoder */
 #include "translate-vfp.inc.c"
+#include "translate-neon.inc.c"
 
 static inline void iwmmxt_load_reg(TCGContext *tcg_ctx, TCGv_i64 var, int reg)
 {
@@ -2660,8 +2519,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
   ((VFP_REG_SHR_POS(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
 #else
 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
-#define VFP_SREG(insn, bigbit, smallbit) \
-  ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
 #endif
 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
     if (dc_isar_feature(aa32_simd_r32, s)) { \
@@ -2674,39 +2531,15 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
     }} while (0)
 
 #ifdef _MSC_VER
-#define VFP_SREG_D(insn) VFP_SREG_POS(insn, 12, 22)
 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
-#define VFP_SREG_N(insn) VFP_SREG_POS(insn, 16,  7)
 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
-#define VFP_SREG_M(insn) VFP_SREG_NEG(insn,  0,  5)
 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
 #else
-#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
-#define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
-#define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
 #endif
 
-static void gen_neon_dup_low16(TCGContext *tcg_ctx, TCGv_i32 var)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
-    tcg_gen_ext16u_i32(tcg_ctx, var, var);
-    tcg_gen_shli_i32(tcg_ctx, tmp, var, 16);
-    tcg_gen_or_i32(tcg_ctx, var, var, tmp);
-    tcg_temp_free_i32(tcg_ctx, tmp);
-}
-
-static void gen_neon_dup_high16(TCGContext *tcg_ctx, TCGv_i32 var)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
-    tcg_gen_andi_i32(tcg_ctx, var, var, 0xffff0000);
-    tcg_gen_shri_i32(tcg_ctx, tmp, var, 16);
-    tcg_gen_or_i32(tcg_ctx, var, var, tmp);
-    tcg_temp_free_i32(tcg_ctx, tmp);
-}
-
 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
 {
     struct uc_struct *uc = s->uc;
@@ -3015,7 +2848,7 @@ static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
     tcg_temp_free_i32(tcg_ctx, tcg_tgtmode);
     tcg_temp_free_i32(tcg_ctx, tcg_regno);
     tcg_temp_free_i32(tcg_ctx, tcg_reg);
-    s->base.is_jmp = DISAS_UPDATE;
+    s->base.is_jmp = DISAS_UPDATE_EXIT;
 }
 
 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
@@ -3038,7 +2871,7 @@ static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
     tcg_temp_free_i32(tcg_ctx, tcg_tgtmode);
     tcg_temp_free_i32(tcg_ctx, tcg_regno);
     store_reg(s, rn, tcg_reg);
-    s->base.is_jmp = DISAS_UPDATE;
+    s->base.is_jmp = DISAS_UPDATE_EXIT;
 }
 
 /* Store value to PC as for an exception return (ie don't
@@ -3077,1047 +2910,663 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
     gen_rfe(s, pc, load_cpu_field(tcg_ctx, spsr));
 }
 
-#define CPU_V001 tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1
+static void gen_gvec_fn3_qc(TCGContext *tcg_ctx, uint32_t rd_ofs, uint32_t rn_ofs,
+                            uint32_t rm_ofs, uint32_t opr_sz,
+                            uint32_t max_sz, gen_helper_gvec_3_ptr *fn)
+{
+    TCGv_ptr qc_ptr = tcg_temp_new_ptr(tcg_ctx);
+
+    tcg_gen_addi_ptr(tcg_ctx, qc_ptr, tcg_ctx->cpu_env, offsetof(CPUARMState, vfp.qc));
+    tcg_gen_gvec_3_ptr(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, qc_ptr,
+                       opr_sz, max_sz, 0, fn);
+    tcg_temp_free_ptr(tcg_ctx, qc_ptr);
+}
 
-static inline void gen_neon_add(TCGContext *tcg_ctx, int size, TCGv_i32 t0, TCGv_i32 t1)
+void gen_gvec_sqrdmlah_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
 {
-    switch (size) {
-    case 0: gen_helper_neon_add_u8(tcg_ctx, t0, t0, t1); break;
-    case 1: gen_helper_neon_add_u16(tcg_ctx, t0, t0, t1); break;
-    case 2: tcg_gen_add_i32(tcg_ctx, t0, t0, t1); break;
-    default: abort();
-    }
+    static gen_helper_gvec_3_ptr * const fns[2] = {
+        gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
+    };
+    tcg_debug_assert(vece >= 1 && vece <= 2);
+    gen_gvec_fn3_qc(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
 }
 
-static inline void gen_neon_rsb(TCGContext *tcg_ctx, int size, TCGv_i32 t0, TCGv_i32 t1)
+void gen_gvec_sqrdmlsh_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
 {
-    switch (size) {
-    case 0: gen_helper_neon_sub_u8(tcg_ctx, t0, t1, t0); break;
-    case 1: gen_helper_neon_sub_u16(tcg_ctx, t0, t1, t0); break;
-    case 2: tcg_gen_sub_i32(tcg_ctx, t0, t1, t0); break;
-    default: return;
-    }
+    static gen_helper_gvec_3_ptr * const fns[2] = {
+        gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
+    };
+    tcg_debug_assert(vece >= 1 && vece <= 2);
+    gen_gvec_fn3_qc(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
+}
+
+#define GEN_CMP0(NAME, COND)                                            \
+    static void gen_##NAME##0_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a)               \
+    {                                                                   \
+        tcg_gen_setcondi_i32(tcg_ctx, COND, d, a, 0);                            \
+        tcg_gen_neg_i32(tcg_ctx, d, d);                                          \
+    }                                                                   \
+    static void gen_##NAME##0_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a)               \
+    {                                                                   \
+        tcg_gen_setcondi_i64(tcg_ctx, COND, d, a, 0);                            \
+        tcg_gen_neg_i64(tcg_ctx, d, d);                                          \
+    }                                                                   \
+    static void gen_##NAME##0_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a) \
+    {                                                                   \
+        TCGv_vec zero = tcg_const_zeros_vec_matching(tcg_ctx, d);                \
+        tcg_gen_cmp_vec(tcg_ctx, COND, vece, d, a, zero);                        \
+        tcg_temp_free_vec(tcg_ctx, zero);                                        \
+    }                                                                   \
+    void gen_gvec_##NAME##0(TCGContext *tcg_ctx, unsigned vece, uint32_t d, uint32_t m,      \
+                            uint32_t opr_sz, uint32_t max_sz)           \
+    {                                                                   \
+        const GVecGen2 op[4] = {                                        \
+            { .fno = gen_helper_gvec_##NAME##0_b,                       \
+              .fniv = gen_##NAME##0_vec,                                \
+              .opt_opc = vecop_list_cmp,                                \
+              .vece = MO_8 },                                           \
+            { .fno = gen_helper_gvec_##NAME##0_h,                       \
+              .fniv = gen_##NAME##0_vec,                                \
+              .opt_opc = vecop_list_cmp,                                \
+              .vece = MO_16 },                                          \
+            { .fni4 = gen_##NAME##0_i32,                                \
+              .fniv = gen_##NAME##0_vec,                                \
+              .opt_opc = vecop_list_cmp,                                \
+              .vece = MO_32 },                                          \
+            { .fni8 = gen_##NAME##0_i64,                                \
+              .fniv = gen_##NAME##0_vec,                                \
+              .opt_opc = vecop_list_cmp,                                \
+              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
+              .vece = MO_64 },                                          \
+        };                                                              \
+        tcg_gen_gvec_2(tcg_ctx, d, m, opr_sz, max_sz, &op[vece]);                \
+    }
+
+static const TCGOpcode vecop_list_cmp[] = {
+    INDEX_op_cmp_vec, 0
+};
+
+GEN_CMP0(ceq, TCG_COND_EQ)
+GEN_CMP0(cle, TCG_COND_LE)
+GEN_CMP0(cge, TCG_COND_GE)
+GEN_CMP0(clt, TCG_COND_LT)
+GEN_CMP0(cgt, TCG_COND_GT)
+
+#undef GEN_CMP0
+
+static void gen_ssra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_vec_sar8i_i64(tcg_ctx, a, a, shift);
+    tcg_gen_vec_add8_i64(tcg_ctx, d, d, a);
 }
 
-/* 32-bit pairwise ops end up the same as the elementwise versions.  */
-#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
-#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
-#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
-#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
+static void gen_ssra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_vec_sar16i_i64(tcg_ctx, a, a, shift);
+    tcg_gen_vec_add16_i64(tcg_ctx, d, d, a);
+}
 
-#define GEN_NEON_INTEGER_OP_ENV(name) do { \
-    switch ((size << 1) | u) { \
-    case 0: \
-        gen_helper_neon_##name##_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \
-        break; \
-    case 1: \
-        gen_helper_neon_##name##_u8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \
-        break; \
-    case 2: \
-        gen_helper_neon_##name##_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \
-        break; \
-    case 3: \
-        gen_helper_neon_##name##_u16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \
-        break; \
-    case 4: \
-        gen_helper_neon_##name##_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \
-        break; \
-    case 5: \
-        gen_helper_neon_##name##_u32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \
-        break; \
-    default: return 1; \
-    }} while (0)
+static void gen_ssra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+    tcg_gen_sari_i32(tcg_ctx, a, a, shift);
+    tcg_gen_add_i32(tcg_ctx, d, d, a);
+}
 
-#define GEN_NEON_INTEGER_OP(name) do { \
-    switch ((size << 1) | u) { \
-    case 0: \
-        gen_helper_neon_##name##_s8(tcg_ctx, tmp, tmp, tmp2); \
-        break; \
-    case 1: \
-        gen_helper_neon_##name##_u8(tcg_ctx, tmp, tmp, tmp2); \
-        break; \
-    case 2: \
-        gen_helper_neon_##name##_s16(tcg_ctx, tmp, tmp, tmp2); \
-        break; \
-    case 3: \
-        gen_helper_neon_##name##_u16(tcg_ctx, tmp, tmp, tmp2); \
-        break; \
-    case 4: \
-        gen_helper_neon_##name##_s32(tcg_ctx, tmp, tmp, tmp2); \
-        break; \
-    case 5: \
-        gen_helper_neon_##name##_u32(tcg_ctx, tmp, tmp, tmp2); \
-        break; \
-    default: return 1; \
-    }} while (0)
+static void gen_ssra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_sari_i64(tcg_ctx, a, a, shift);
+    tcg_gen_add_i64(tcg_ctx, d, d, a);
+}
 
-static TCGv_i32 neon_load_scratch(TCGContext *tcg_ctx, int scratch)
+static void gen_ssra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
 {
-    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
-    tcg_gen_ld_i32(tcg_ctx, tmp, tcg_ctx->cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
-    return tmp;
+    tcg_gen_sari_vec(tcg_ctx, vece, a, a, sh);
+    tcg_gen_add_vec(tcg_ctx, vece, d, d, a);
 }
 
-static void neon_store_scratch(TCGContext *tcg_ctx, int scratch, TCGv_i32 var)
+void gen_gvec_ssra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
 {
-    tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
-    tcg_temp_free_i32(tcg_ctx, var);
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_sari_vec, INDEX_op_add_vec, 0
+    };
+    static const GVecGen2i ops[4] = {
+        { .fni8 = gen_ssra8_i64,
+          .fniv = gen_ssra_vec,
+          .fno = gen_helper_gvec_ssra_b,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fni8 = gen_ssra16_i64,
+          .fniv = gen_ssra_vec,
+          .fno = gen_helper_gvec_ssra_h,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_ssra32_i32,
+          .fniv = gen_ssra_vec,
+          .fno = gen_helper_gvec_ssra_s,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_ssra64_i64,
+          .fniv = gen_ssra_vec,
+          .fno = gen_helper_gvec_ssra_b,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_64 },
+    };
+
+    /* tszimm encoding produces immediates in the range [1..esize]. */
+    tcg_debug_assert(shift > 0);
+    tcg_debug_assert(shift <= (8 << vece));
+
+    /*
+     * Shifts larger than the element size are architecturally valid.
+     * Signed results in all sign bits.
+     */
+    shift = MIN(shift, (8 << vece) - 1);
+    tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
 }
 
-static inline TCGv_i32 neon_get_scalar(TCGContext *tcg_ctx, int size, int reg)
+static void gen_usra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
 {
-    TCGv_i32 tmp;
-    if (size == 1) {
-        tmp = neon_load_reg(tcg_ctx, reg & 7, reg >> 4);
-        if (reg & 8) {
-            gen_neon_dup_high16(tcg_ctx, tmp);
-        } else {
-            gen_neon_dup_low16(tcg_ctx, tmp);
-        }
-    } else {
-        tmp = neon_load_reg(tcg_ctx, reg & 15, reg >> 4);
-    }
-    return tmp;
+    tcg_gen_vec_shr8i_i64(tcg_ctx, a, a, shift);
+    tcg_gen_vec_add8_i64(tcg_ctx, d, d, a);
 }
 
-static int gen_neon_unzip(TCGContext *tcg_ctx, int rd, int rm, int size, int q)
+static void gen_usra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
 {
-    TCGv_ptr pd, pm;
-    
-    if (!q && size == 2) {
-        return 1;
-    }
-    pd = vfp_reg_ptr(tcg_ctx, true, rd);
-    pm = vfp_reg_ptr(tcg_ctx, true, rm);
-    if (q) {
-        switch (size) {
-        case 0:
-            gen_helper_neon_qunzip8(tcg_ctx, pd, pm);
-            break;
-        case 1:
-            gen_helper_neon_qunzip16(tcg_ctx, pd, pm);
-            break;
-        case 2:
-            gen_helper_neon_qunzip32(tcg_ctx, pd, pm);
-            break;
-        default:
-            abort();
-        }
-    } else {
-        switch (size) {
-        case 0:
-            gen_helper_neon_unzip8(tcg_ctx, pd, pm);
-            break;
-        case 1:
-            gen_helper_neon_unzip16(tcg_ctx, pd, pm);
-            break;
-        default:
-            abort();
-        }
-    }
-    tcg_temp_free_ptr(tcg_ctx, pd);
-    tcg_temp_free_ptr(tcg_ctx, pm);
-    return 0;
+    tcg_gen_vec_shr16i_i64(tcg_ctx, a, a, shift);
+    tcg_gen_vec_add16_i64(tcg_ctx, d, d, a);
 }
 
-static int gen_neon_zip(TCGContext *tcg_ctx, int rd, int rm, int size, int q)
+static void gen_usra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift)
 {
-    TCGv_ptr pd, pm;
+    tcg_gen_shri_i32(tcg_ctx, a, a, shift);
+    tcg_gen_add_i32(tcg_ctx, d, d, a);
+}
 
-    if (!q && size == 2) {
-        return 1;
-    }
-    pd = vfp_reg_ptr(tcg_ctx, true, rd);
-    pm = vfp_reg_ptr(tcg_ctx, true, rm);
-    if (q) {
-        switch (size) {
-        case 0:
-            gen_helper_neon_qzip8(tcg_ctx, pd, pm);
-            break;
-        case 1:
-            gen_helper_neon_qzip16(tcg_ctx, pd, pm);
-            break;
-        case 2:
-            gen_helper_neon_qzip32(tcg_ctx, pd, pm);
-            break;
-        default:
-            abort();
-        }
+static void gen_usra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_shri_i64(tcg_ctx, a, a, shift);
+    tcg_gen_add_i64(tcg_ctx, d, d, a);
+}
+
+static void gen_usra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+    tcg_gen_shri_vec(tcg_ctx, vece, a, a, sh);
+    tcg_gen_add_vec(tcg_ctx, vece, d, d, a);
+}
+
+void gen_gvec_usra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_shri_vec, INDEX_op_add_vec, 0
+    };
+    static const GVecGen2i ops[4] = {
+        { .fni8 = gen_usra8_i64,
+          .fniv = gen_usra_vec,
+          .fno = gen_helper_gvec_usra_b,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_8, },
+        { .fni8 = gen_usra16_i64,
+          .fniv = gen_usra_vec,
+          .fno = gen_helper_gvec_usra_h,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_16, },
+        { .fni4 = gen_usra32_i32,
+          .fniv = gen_usra_vec,
+          .fno = gen_helper_gvec_usra_s,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_32, },
+        { .fni8 = gen_usra64_i64,
+          .fniv = gen_usra_vec,
+          .fno = gen_helper_gvec_usra_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_64, },
+    };
+
+    /* tszimm encoding produces immediates in the range [1..esize]. */
+    tcg_debug_assert(shift > 0);
+    tcg_debug_assert(shift <= (8 << vece));
+
+    /*
+     * Shifts larger than the element size are architecturally valid.
+     * Unsigned results in all zeros as input to accumulate: nop.
+     */
+    if (shift < (8 << vece)) {
+        tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
     } else {
-        switch (size) {
-        case 0:
-            gen_helper_neon_zip8(tcg_ctx, pd, pm);
-            break;
-        case 1:
-            gen_helper_neon_zip16(tcg_ctx, pd, pm);
-            break;
-        default:
-            abort();
-        }
+        /* Nop, but we do need to clear the tail. */
+        tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rd_ofs, opr_sz, max_sz);
     }
-    tcg_temp_free_ptr(tcg_ctx, pd);
-    tcg_temp_free_ptr(tcg_ctx, pm);
-    return 0;
 }
 
-static void gen_neon_trn_u8(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1)
+/*
+ * Shift one less than the requested amount, and the low bit is
+ * the rounding bit.  For the 8 and 16-bit operations, because we
+ * mask the low bit, we can perform a normal integer shift instead
+ * of a vector shift.
+ */
+static void gen_srshr8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    TCGv_i32 rd, tmp;
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
-    rd = tcg_temp_new_i32(tcg_ctx);
-    tmp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1);
+    tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_8, 1));
+    tcg_gen_vec_sar8i_i64(tcg_ctx, d, a, sh);
+    tcg_gen_vec_add8_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
+}
 
-    tcg_gen_shli_i32(tcg_ctx, rd, t0, 8);
-    tcg_gen_andi_i32(tcg_ctx, rd, rd, 0xff00ff00);
-    tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0x00ff00ff);
-    tcg_gen_or_i32(tcg_ctx, rd, rd, tmp);
+static void gen_srshr16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
-    tcg_gen_shri_i32(tcg_ctx, t1, t1, 8);
-    tcg_gen_andi_i32(tcg_ctx, t1, t1, 0x00ff00ff);
-    tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xff00ff00);
-    tcg_gen_or_i32(tcg_ctx, t1, t1, tmp);
-    tcg_gen_mov_i32(tcg_ctx, t0, rd);
+    tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1);
+    tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_16, 1));
+    tcg_gen_vec_sar16i_i64(tcg_ctx, d, a, sh);
+    tcg_gen_vec_add16_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
+}
 
-    tcg_temp_free_i32(tcg_ctx, tmp);
-    tcg_temp_free_i32(tcg_ctx, rd);
+static void gen_srshr32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+    TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
+
+    tcg_gen_extract_i32(tcg_ctx, t, a, sh - 1, 1);
+    tcg_gen_sari_i32(tcg_ctx, d, a, sh);
+    tcg_gen_add_i32(tcg_ctx, d, d, t);
+    tcg_temp_free_i32(tcg_ctx, t);
 }
 
-static void gen_neon_trn_u16(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1)
+static void gen_srshr64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    TCGv_i32 rd, tmp;
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
-    rd = tcg_temp_new_i32(tcg_ctx);
-    tmp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_extract_i64(tcg_ctx, t, a, sh - 1, 1);
+    tcg_gen_sari_i64(tcg_ctx, d, a, sh);
+    tcg_gen_add_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
+}
 
-    tcg_gen_shli_i32(tcg_ctx, rd, t0, 16);
-    tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0xffff);
-    tcg_gen_or_i32(tcg_ctx, rd, rd, tmp);
-    tcg_gen_shri_i32(tcg_ctx, t1, t1, 16);
-    tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xffff0000);
-    tcg_gen_or_i32(tcg_ctx, t1, t1, tmp);
-    tcg_gen_mov_i32(tcg_ctx, t0, rd);
+static void gen_srshr_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
+    TCGv_vec ones = tcg_temp_new_vec_matching(tcg_ctx, d);
 
-    tcg_temp_free_i32(tcg_ctx, tmp);
-    tcg_temp_free_i32(tcg_ctx, rd);
-}
-
-
-static struct {
-    int nregs;
-    int interleave;
-    int spacing;
-} const neon_ls_element_type[11] = {
-    {1, 4, 1},
-    {1, 4, 2},
-    {4, 1, 1},
-    {2, 2, 2},
-    {1, 3, 1},
-    {1, 3, 2},
-    {3, 1, 1},
-    {1, 1, 1},
-    {1, 2, 1},
-    {1, 2, 2},
-    {2, 1, 1}
-};
+    tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh - 1);
+    tcg_gen_dupi_vec(tcg_ctx, vece, ones, 1);
+    tcg_gen_and_vec(tcg_ctx, vece, t, t, ones);
+    tcg_gen_sari_vec(tcg_ctx, vece, d, a, sh);
+    tcg_gen_add_vec(tcg_ctx, vece, d, d, t);
+
+    tcg_temp_free_vec(tcg_ctx, t);
+    tcg_temp_free_vec(tcg_ctx, ones);
+}
 
-/* Translate a NEON load/store element instruction.  Return nonzero if the
-   instruction is invalid.  */
-static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
+void gen_gvec_srshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    int rd, rn, rm;
-    int op;
-    int nregs;
-    int interleave;
-    int spacing;
-    int stride;
-    int size;
-    int reg;
-    int load;
-    int n;
-    int vec_size;
-    int mmu_idx;
-    MemOp endian;
-    TCGv_i32 addr;
-    TCGv_i32 tmp;
-    TCGv_i32 tmp2;
-    TCGv_i64 tmp64;
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+    };
+    static const GVecGen2i ops[4] = {
+        { .fni8 = gen_srshr8_i64,
+          .fniv = gen_srshr_vec,
+          .fno = gen_helper_gvec_srshr_b,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fni8 = gen_srshr16_i64,
+          .fniv = gen_srshr_vec,
+          .fno = gen_helper_gvec_srshr_h,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_srshr32_i32,
+          .fniv = gen_srshr_vec,
+          .fno = gen_helper_gvec_srshr_s,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_srshr64_i64,
+          .fniv = gen_srshr_vec,
+          .fno = gen_helper_gvec_srshr_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
 
-    /* FIXME: this access check should not take precedence over UNDEF
-     * for invalid encodings; we will generate incorrect syndrome information
-     * for attempts to execute invalid vfp/neon encodings with FP disabled.
-     */
-    if (s->fp_excp_el) {
-        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
-                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
-        return 0;
-    }
+    /* tszimm encoding produces immediates in the range [1..esize] */
+    tcg_debug_assert(shift > 0);
+    tcg_debug_assert(shift <= (8 << vece));
 
-    if (!s->vfp_enabled)
-      return 1;
-    VFP_DREG_D(rd, insn);
-    rn = (insn >> 16) & 0xf;
-    rm = insn & 0xf;
-    load = (insn & (1 << 21)) != 0;
-    endian = s->be_data;
-    mmu_idx = get_mem_index(s);
-    if ((insn & (1 << 23)) == 0) {
-        /* Load store all elements.  */
-        op = (insn >> 8) & 0xf;
-        size = (insn >> 6) & 3;
-        if (op > 10)
-            return 1;
-        /* Catch UNDEF cases for bad values of align field */
-        switch (op & 0xc) {
-        case 4:
-            if (((insn >> 5) & 1) == 1) {
-                return 1;
-            }
-            break;
-        case 8:
-            if (((insn >> 4) & 3) == 3) {
-                return 1;
-            }
-            break;
-        default:
-            break;
-        }
-        nregs = neon_ls_element_type[op].nregs;
-        interleave = neon_ls_element_type[op].interleave;
-        spacing = neon_ls_element_type[op].spacing;
-        if (size == 3 && (interleave | spacing) != 1) {
-            return 1;
-        }
-        /* For our purposes, bytes are always little-endian.  */
-        if (size == 0) {
-            endian = MO_LE;
-        }
-        /* Consecutive little-endian elements from a single register
-         * can be promoted to a larger little-endian operation.
+    if (shift == (8 << vece)) {
+        /*
+         * Shifts larger than the element size are architecturally valid.
+         * Signed results in all sign bits.  With rounding, this produces
+         *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+         * I.e. always zero.
          */
-        if (interleave == 1 && endian == MO_LE) {
-            size = 3;
-        }
-        tmp64 = tcg_temp_new_i64(tcg_ctx);
-        addr = tcg_temp_new_i32(tcg_ctx);
-        tmp2 = tcg_const_i32(tcg_ctx, 1 << size);
-        load_reg_var(s, addr, rn);
-        for (reg = 0; reg < nregs; reg++) {
-            for (n = 0; n < 8 >> size; n++) {
-                int xs;
-                for (xs = 0; xs < interleave; xs++) {
-                    int tt = rd + reg + spacing * xs;
-
-                    if (load) {
-                        gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
-                        neon_store_element64(tcg_ctx, tt, n, size, tmp64);
-                    } else {
-                        neon_load_element64(tcg_ctx, tmp64, tt, n, size);
-                        gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
-                    }
-                    tcg_gen_add_i32(tcg_ctx, addr, addr, tmp2);
-                }
-            }
-        }
-        tcg_temp_free_i32(tcg_ctx, addr);
-        tcg_temp_free_i32(tcg_ctx, tmp2);
-        tcg_temp_free_i64(tcg_ctx, tmp64);
-        stride = nregs * interleave * 8;
+        tcg_gen_gvec_dup_imm(tcg_ctx, vece, rd_ofs, opr_sz, max_sz, 0);
     } else {
-        size = (insn >> 10) & 3;
-        if (size == 3) {
-            /* Load single element to all lanes.  */
-            int a = (insn >> 4) & 1;
-            if (!load) {
-                return 1;
-            }
-            size = (insn >> 6) & 3;
-            nregs = ((insn >> 8) & 3) + 1;
-
-            if (size == 3) {
-                if (nregs != 4 || a == 0) {
-                    return 1;
-                }
-                /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
-                size = 2;
-            }
-            if (nregs == 1 && a == 1 && size == 0) {
-                return 1;
-            }
-            if (nregs == 3 && a == 1) {
-                return 1;
-            }
-            addr = tcg_temp_new_i32(tcg_ctx);
-            load_reg_var(s, addr, rn);
-
-            /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
-             * VLD2/3/4 to all lanes: bit 5 indicates register stride.
-             */
-            stride = (insn & (1 << 5)) ? 2 : 1;
-            vec_size = nregs == 1 ? stride * 8 : 8;
-
-            tmp = tcg_temp_new_i32(tcg_ctx);
-            for (reg = 0; reg < nregs; reg++) {
-                gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
-                                s->be_data | size);
-                if ((rd & 1) && vec_size == 16) {
-                    /* We cannot write 16 bytes at once because the
-                     * destination is unaligned.
-                     */
-                    tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(rd, 0),
-                                         8, 8, tmp);
-                    tcg_gen_gvec_mov(tcg_ctx, 0, neon_reg_offset(rd + 1, 0),
-                                     neon_reg_offset(rd, 0), 8, 8);
-                } else {
-                    tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(rd, 0),
-                                         vec_size, vec_size, tmp);
-                }
-                tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size);
-                rd += stride;
-            }
-            tcg_temp_free_i32(tcg_ctx, tmp);
-            tcg_temp_free_i32(tcg_ctx, addr);
-            stride = (1 << size) * nregs;
-        } else {
-            /* Single element.  */
-            int idx = (insn >> 4) & 0xf;
-            int reg_idx;
-            switch (size) {
-            case 0:
-                reg_idx = (insn >> 5) & 7;
-                stride = 1;
-                break;
-            case 1:
-                reg_idx = (insn >> 6) & 3;
-                stride = (insn & (1 << 5)) ? 2 : 1;
-                break;
-            case 2:
-                reg_idx = (insn >> 7) & 1;
-                stride = (insn & (1 << 6)) ? 2 : 1;
-                break;
-            default:
-                abort();
-            }
-            nregs = ((insn >> 8) & 3) + 1;
-            /* Catch the UNDEF cases. This is unavoidably a bit messy. */
-            switch (nregs) {
-            case 1:
-                if (((idx & (1 << size)) != 0) ||
-                    (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
-                    return 1;
-                }
-                break;
-            case 3:
-                if ((idx & 1) != 0) {
-                    return 1;
-                }
-                /* fall through */
-            case 2:
-                if (size == 2 && (idx & 2) != 0) {
-                    return 1;
-                }
-                break;
-            case 4:
-                if ((size == 2) && ((idx & 3) == 3)) {
-                    return 1;
-                }
-                break;
-            default:
-                abort();
-            }
-            if ((rd + stride * (nregs - 1)) > 31) {
-                /* Attempts to write off the end of the register file
-                 * are UNPREDICTABLE; we choose to UNDEF because otherwise
-                 * the neon_load_reg() would write off the end of the array.
-                 */
-                return 1;
-            }
-            tmp = tcg_temp_new_i32(tcg_ctx);
-            addr = tcg_temp_new_i32(tcg_ctx);
-            load_reg_var(s, addr, rn);
-            for (reg = 0; reg < nregs; reg++) {
-                if (load) {
-                    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
-                                    s->be_data | size);
-                    neon_store_element(tcg_ctx, rd, reg_idx, size, tmp);
-                } else { /* Store */
-                    neon_load_element(tcg_ctx, tmp, rd, reg_idx, size);
-                    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
-                                    s->be_data | size);
-                }
-                rd += stride;
-                tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size);
-            }
-            tcg_temp_free_i32(tcg_ctx, addr);
-            tcg_temp_free_i32(tcg_ctx, tmp);
-            stride = nregs * (1 << size);
-        }
+        tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
     }
-    if (rm != 15) {
-        TCGv_i32 base;
-
-        base = load_reg(s, rn);
-        if (rm == 13) {
-            tcg_gen_addi_i32(tcg_ctx, base, base, stride);
-        } else {
-            TCGv_i32 index;
-            index = load_reg(s, rm);
-            tcg_gen_add_i32(tcg_ctx, base, base, index);
-            tcg_temp_free_i32(tcg_ctx, index);
-        }
-        store_reg(s, rn, base);
-    }
-    return 0;
 }
 
-static inline void gen_neon_narrow(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src)
+static void gen_srsra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    switch (size) {
-    case 0: gen_helper_neon_narrow_u8(tcg_ctx, dest, src); break;
-    case 1: gen_helper_neon_narrow_u16(tcg_ctx, dest, src); break;
-    case 2: tcg_gen_extrl_i64_i32(tcg_ctx, dest, src); break;
-    default: abort();
-    }
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
+
+    gen_srshr8_i64(tcg_ctx, t, a, sh);
+    tcg_gen_vec_add8_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
 }
 
-static inline void gen_neon_narrow_sats(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src)
+static void gen_srsra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    switch (size) {
-    case 0: gen_helper_neon_narrow_sat_s8(tcg_ctx, dest, tcg_ctx->cpu_env, src); break;
-    case 1: gen_helper_neon_narrow_sat_s16(tcg_ctx, dest, tcg_ctx->cpu_env, src); break;
-    case 2: gen_helper_neon_narrow_sat_s32(tcg_ctx, dest, tcg_ctx->cpu_env, src); break;
-    default: abort();
-    }
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
+
+    gen_srshr16_i64(tcg_ctx, t, a, sh);
+    tcg_gen_vec_add16_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
 }
 
-static inline void gen_neon_narrow_satu(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src)
+static void gen_srsra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh)
 {
-    switch (size) {
-    case 0: gen_helper_neon_narrow_sat_u8(tcg_ctx, dest, tcg_ctx->cpu_env, src); break;
-    case 1: gen_helper_neon_narrow_sat_u16(tcg_ctx, dest, tcg_ctx->cpu_env, src); break;
-    case 2: gen_helper_neon_narrow_sat_u32(tcg_ctx, dest, tcg_ctx->cpu_env, src); break;
-    default: abort();
-    }
+    TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
+
+    gen_srshr32_i32(tcg_ctx, t, a, sh);
+    tcg_gen_add_i32(tcg_ctx, d, d, t);
+    tcg_temp_free_i32(tcg_ctx, t);
 }
 
-static inline void gen_neon_unarrow_sats(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src)
+static void gen_srsra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    switch (size) {
-    case 0: gen_helper_neon_unarrow_sat8(tcg_ctx, dest, tcg_ctx->cpu_env, src); break;
-    case 1: gen_helper_neon_unarrow_sat16(tcg_ctx, dest, tcg_ctx->cpu_env, src); break;
-    case 2: gen_helper_neon_unarrow_sat32(tcg_ctx, dest, tcg_ctx->cpu_env, src); break;
-    default: abort();
-    }
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
+
+    gen_srshr64_i64(tcg_ctx, t, a, sh);
+    tcg_gen_add_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
 }
 
-static inline void gen_neon_shift_narrow(TCGContext *tcg_ctx, int size, TCGv_i32 var, TCGv_i32 shift,
-                                         int q, int u)
+static void gen_srsra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
 {
-    if (q) {
-        if (u) {
-            switch (size) {
-            case 1: gen_helper_neon_rshl_u16(tcg_ctx, var, var, shift); break;
-            case 2: gen_helper_neon_rshl_u32(tcg_ctx, var, var, shift); break;
-            default: abort();
-            }
-        } else {
-            switch (size) {
-            case 1: gen_helper_neon_rshl_s16(tcg_ctx, var, var, shift); break;
-            case 2: gen_helper_neon_rshl_s32(tcg_ctx, var, var, shift); break;
-            default: abort();
-            }
-        }
-    } else {
-        if (u) {
-            switch (size) {
-            case 1: gen_helper_neon_shl_u16(tcg_ctx, var, var, shift); break;
-            case 2: gen_ushl_i32(tcg_ctx, var, var, shift); break;
-            default: abort();
-            }
-        } else {
-            switch (size) {
-            case 1: gen_helper_neon_shl_s16(tcg_ctx, var, var, shift); break;
-            case 2: gen_sshl_i32(tcg_ctx, var, var, shift); break;
-            default: abort();
-            }
-        }
-    }
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
+
+    gen_srshr_vec(tcg_ctx, vece, t, a, sh);
+    tcg_gen_add_vec(tcg_ctx, vece, d, d, t);
+    tcg_temp_free_vec(tcg_ctx, t);
 }
 
-static inline void gen_neon_widen(TCGContext *tcg_ctx, TCGv_i64 dest, TCGv_i32 src, int size, int u)
+void gen_gvec_srsra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
 {
-    if (u) {
-        switch (size) {
-        case 0: gen_helper_neon_widen_u8(tcg_ctx, dest, src); break;
-        case 1: gen_helper_neon_widen_u16(tcg_ctx, dest, src); break;
-        case 2: tcg_gen_extu_i32_i64(tcg_ctx, dest, src); break;
-        default: abort();
-        }
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+    };
+    static const GVecGen2i ops[4] = {
+        { .fni8 = gen_srsra8_i64,
+          .fniv = gen_srsra_vec,
+          .fno = gen_helper_gvec_srsra_b,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_8 },
+        { .fni8 = gen_srsra16_i64,
+          .fniv = gen_srsra_vec,
+          .fno = gen_helper_gvec_srsra_h,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_16 },
+        { .fni4 = gen_srsra32_i32,
+          .fniv = gen_srsra_vec,
+          .fno = gen_helper_gvec_srsra_s,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_32 },
+        { .fni8 = gen_srsra64_i64,
+          .fniv = gen_srsra_vec,
+          .fno = gen_helper_gvec_srsra_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_64 },
+    };
+
+    /* tszimm encoding produces immediates in the range [1..esize] */
+    tcg_debug_assert(shift > 0);
+    tcg_debug_assert(shift <= (8 << vece));
+
+    /*
+     * Shifts larger than the element size are architecturally valid.
+     * Signed results in all sign bits.  With rounding, this produces
+     *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+     * I.e. always zero.  With accumulation, this leaves D unchanged.
+     */
+    if (shift == (8 << vece)) {
+        /* Nop, but we do need to clear the tail. */
+        tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rd_ofs, opr_sz, max_sz);
     } else {
-        switch (size) {
-        case 0: gen_helper_neon_widen_s8(tcg_ctx, dest, src); break;
-        case 1: gen_helper_neon_widen_s16(tcg_ctx, dest, src); break;
-        case 2: tcg_gen_ext_i32_i64(tcg_ctx, dest, src); break;
-        default: abort();
-        }
+        tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
     }
-    tcg_temp_free_i32(tcg_ctx, src);
 }
 
-static inline void gen_neon_addl(TCGContext *tcg_ctx, int size)
+static void gen_urshr8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    switch (size) {
-    case 0: gen_helper_neon_addl_u16(tcg_ctx, CPU_V001); break;
-    case 1: gen_helper_neon_addl_u32(tcg_ctx, CPU_V001); break;
-    case 2: tcg_gen_add_i64(tcg_ctx, CPU_V001); break;
-    default: abort();
-    }
-}
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
-static inline void gen_neon_subl(TCGContext *tcg_ctx, int size)
-{
-    switch (size) {
-    case 0: gen_helper_neon_subl_u16(tcg_ctx, CPU_V001); break;
-    case 1: gen_helper_neon_subl_u32(tcg_ctx, CPU_V001); break;
-    case 2: tcg_gen_sub_i64(tcg_ctx, CPU_V001); break;
-    default: abort();
-    }
+    tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1);
+    tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_8, 1));
+    tcg_gen_vec_shr8i_i64(tcg_ctx, d, a, sh);
+    tcg_gen_vec_add8_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
 }
 
-static inline void gen_neon_negl(TCGContext *tcg_ctx, TCGv_i64 var, int size)
+static void gen_urshr16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    switch (size) {
-    case 0: gen_helper_neon_negl_u16(tcg_ctx, var, var); break;
-    case 1: gen_helper_neon_negl_u32(tcg_ctx, var, var); break;
-    case 2:
-        tcg_gen_neg_i64(tcg_ctx, var, var);
-        break;
-    default: abort();
-    }
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
+
+    tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1);
+    tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_16, 1));
+    tcg_gen_vec_shr16i_i64(tcg_ctx, d, a, sh);
+    tcg_gen_vec_add16_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
 }
 
-static inline void gen_neon_addl_saturate(TCGContext *tcg_ctx, TCGv_i64 op0, TCGv_i64 op1, int size)
+static void gen_urshr32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh)
 {
-    switch (size) {
-    case 1: gen_helper_neon_addl_saturate_s32(tcg_ctx, op0, tcg_ctx->cpu_env, op0, op1); break;
-    case 2: gen_helper_neon_addl_saturate_s64(tcg_ctx, op0, tcg_ctx->cpu_env, op0, op1); break;
-    default: abort();
-    }
+    TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
+
+    tcg_gen_extract_i32(tcg_ctx, t, a, sh - 1, 1);
+    tcg_gen_shri_i32(tcg_ctx, d, a, sh);
+    tcg_gen_add_i32(tcg_ctx, d, d, t);
+    tcg_temp_free_i32(tcg_ctx, t);
 }
 
-static inline void gen_neon_mull(TCGContext *tcg_ctx, TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
-                                 int size, int u)
+static void gen_urshr64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    TCGv_i64 tmp;
-
-    switch ((size << 1) | u) {
-    case 0: gen_helper_neon_mull_s8(tcg_ctx, dest, a, b); break;
-    case 1: gen_helper_neon_mull_u8(tcg_ctx, dest, a, b); break;
-    case 2: gen_helper_neon_mull_s16(tcg_ctx, dest, a, b); break;
-    case 3: gen_helper_neon_mull_u16(tcg_ctx, dest, a, b); break;
-    case 4:
-        tmp = gen_muls_i64_i32(tcg_ctx, a, b);
-        tcg_gen_mov_i64(tcg_ctx, dest, tmp);
-        tcg_temp_free_i64(tcg_ctx, tmp);
-        break;
-    case 5:
-        tmp = gen_mulu_i64_i32(tcg_ctx, a, b);
-        tcg_gen_mov_i64(tcg_ctx, dest, tmp);
-        tcg_temp_free_i64(tcg_ctx, tmp);
-        break;
-    default: abort();
-    }
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
-    /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
-       Don't forget to clean them now.  */
-    if (size < 2) {
-        tcg_temp_free_i32(tcg_ctx, a);
-        tcg_temp_free_i32(tcg_ctx, b);
-    }
+    tcg_gen_extract_i64(tcg_ctx, t, a, sh - 1, 1);
+    tcg_gen_shri_i64(tcg_ctx, d, a, sh);
+    tcg_gen_add_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
 }
 
-static void gen_neon_narrow_op(TCGContext *tcg_ctx, int op, int u, int size,
-                               TCGv_i32 dest, TCGv_i64 src)
+static void gen_urshr_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
 {
-    if (op) {
-        if (u) {
-            gen_neon_unarrow_sats(tcg_ctx, size, dest, src);
-        } else {
-            gen_neon_narrow(tcg_ctx, size, dest, src);
-        }
-    } else {
-        if (u) {
-            gen_neon_narrow_satu(tcg_ctx, size, dest, src);
-        } else {
-            gen_neon_narrow_sats(tcg_ctx, size, dest, src);
-        }
-    }
-}
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
+    TCGv_vec ones = tcg_temp_new_vec_matching(tcg_ctx, d);
 
-/* Symbolic constants for op fields for Neon 3-register same-length.
- * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
- * table A7-9.
- */
-#define NEON_3R_VHADD 0
-#define NEON_3R_VQADD 1
-#define NEON_3R_VRHADD 2
-#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
-#define NEON_3R_VHSUB 4
-#define NEON_3R_VQSUB 5
-#define NEON_3R_VCGT 6
-#define NEON_3R_VCGE 7
-#define NEON_3R_VSHL 8
-#define NEON_3R_VQSHL 9
-#define NEON_3R_VRSHL 10
-#define NEON_3R_VQRSHL 11
-#define NEON_3R_VMAX 12
-#define NEON_3R_VMIN 13
-#define NEON_3R_VABD 14
-#define NEON_3R_VABA 15
-#define NEON_3R_VADD_VSUB 16
-#define NEON_3R_VTST_VCEQ 17
-#define NEON_3R_VML 18 /* VMLA, VMLS */
-#define NEON_3R_VMUL 19
-#define NEON_3R_VPMAX 20
-#define NEON_3R_VPMIN 21
-#define NEON_3R_VQDMULH_VQRDMULH 22
-#define NEON_3R_VPADD_VQRDMLAH 23
-#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
-#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
-#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
-#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
-#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
-#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
-#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
-#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
-
-static const uint8_t neon_3r_sizes[] = {
-    [NEON_3R_VHADD] = 0x7,
-    [NEON_3R_VQADD] = 0xf,
-    [NEON_3R_VRHADD] = 0x7,
-    [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
-    [NEON_3R_VHSUB] = 0x7,
-    [NEON_3R_VQSUB] = 0xf,
-    [NEON_3R_VCGT] = 0x7,
-    [NEON_3R_VCGE] = 0x7,
-    [NEON_3R_VSHL] = 0xf,
-    [NEON_3R_VQSHL] = 0xf,
-    [NEON_3R_VRSHL] = 0xf,
-    [NEON_3R_VQRSHL] = 0xf,
-    [NEON_3R_VMAX] = 0x7,
-    [NEON_3R_VMIN] = 0x7,
-    [NEON_3R_VABD] = 0x7,
-    [NEON_3R_VABA] = 0x7,
-    [NEON_3R_VADD_VSUB] = 0xf,
-    [NEON_3R_VTST_VCEQ] = 0x7,
-    [NEON_3R_VML] = 0x7,
-    [NEON_3R_VMUL] = 0x7,
-    [NEON_3R_VPMAX] = 0x7,
-    [NEON_3R_VPMIN] = 0x7,
-    [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
-    [NEON_3R_VPADD_VQRDMLAH] = 0x7,
-    [NEON_3R_SHA] = 0xf, /* size field encodes op type */
-    [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
-    [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
-    [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
-    [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
-    [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
-    [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
-    [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
-};
+    tcg_gen_shri_vec(tcg_ctx, vece, t, a, shift - 1);
+    tcg_gen_dupi_vec(tcg_ctx, vece, ones, 1);
+    tcg_gen_and_vec(tcg_ctx, vece, t, t, ones);
+    tcg_gen_shri_vec(tcg_ctx, vece, d, a, shift);
+    tcg_gen_add_vec(tcg_ctx, vece, d, d, t);
 
-/* Symbolic constants for op fields for Neon 2-register miscellaneous.
- * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
- * table A7-13.
- */
-#define NEON_2RM_VREV64 0
-#define NEON_2RM_VREV32 1
-#define NEON_2RM_VREV16 2
-#define NEON_2RM_VPADDL 4
-#define NEON_2RM_VPADDL_U 5
-#define NEON_2RM_AESE 6 /* Includes AESD */
-#define NEON_2RM_AESMC 7 /* Includes AESIMC */
-#define NEON_2RM_VCLS 8
-#define NEON_2RM_VCLZ 9
-#define NEON_2RM_VCNT 10
-#define NEON_2RM_VMVN 11
-#define NEON_2RM_VPADAL 12
-#define NEON_2RM_VPADAL_U 13
-#define NEON_2RM_VQABS 14
-#define NEON_2RM_VQNEG 15
-#define NEON_2RM_VCGT0 16
-#define NEON_2RM_VCGE0 17
-#define NEON_2RM_VCEQ0 18
-#define NEON_2RM_VCLE0 19
-#define NEON_2RM_VCLT0 20
-#define NEON_2RM_SHA1H 21
-#define NEON_2RM_VABS 22
-#define NEON_2RM_VNEG 23
-#define NEON_2RM_VCGT0_F 24
-#define NEON_2RM_VCGE0_F 25
-#define NEON_2RM_VCEQ0_F 26
-#define NEON_2RM_VCLE0_F 27
-#define NEON_2RM_VCLT0_F 28
-#define NEON_2RM_VABS_F 30
-#define NEON_2RM_VNEG_F 31
-#define NEON_2RM_VSWP 32
-#define NEON_2RM_VTRN 33
-#define NEON_2RM_VUZP 34
-#define NEON_2RM_VZIP 35
-#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
-#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
-#define NEON_2RM_VSHLL 38
-#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
-#define NEON_2RM_VRINTN 40
-#define NEON_2RM_VRINTX 41
-#define NEON_2RM_VRINTA 42
-#define NEON_2RM_VRINTZ 43
-#define NEON_2RM_VCVT_F16_F32 44
-#define NEON_2RM_VRINTM 45
-#define NEON_2RM_VCVT_F32_F16 46
-#define NEON_2RM_VRINTP 47
-#define NEON_2RM_VCVTAU 48
-#define NEON_2RM_VCVTAS 49
-#define NEON_2RM_VCVTNU 50
-#define NEON_2RM_VCVTNS 51
-#define NEON_2RM_VCVTPU 52
-#define NEON_2RM_VCVTPS 53
-#define NEON_2RM_VCVTMU 54
-#define NEON_2RM_VCVTMS 55
-#define NEON_2RM_VRECPE 56
-#define NEON_2RM_VRSQRTE 57
-#define NEON_2RM_VRECPE_F 58
-#define NEON_2RM_VRSQRTE_F 59
-#define NEON_2RM_VCVT_FS 60
-#define NEON_2RM_VCVT_FU 61
-#define NEON_2RM_VCVT_SF 62
-#define NEON_2RM_VCVT_UF 63
-
-static bool neon_2rm_is_v8_op(int op)
-{
-    /* Return true if this neon 2reg-misc op is ARMv8 and up */
-    switch (op) {
-    case NEON_2RM_VRINTN:
-    case NEON_2RM_VRINTA:
-    case NEON_2RM_VRINTM:
-    case NEON_2RM_VRINTP:
-    case NEON_2RM_VRINTZ:
-    case NEON_2RM_VRINTX:
-    case NEON_2RM_VCVTAU:
-    case NEON_2RM_VCVTAS:
-    case NEON_2RM_VCVTNU:
-    case NEON_2RM_VCVTNS:
-    case NEON_2RM_VCVTPU:
-    case NEON_2RM_VCVTPS:
-    case NEON_2RM_VCVTMU:
-    case NEON_2RM_VCVTMS:
-        return true;
-    default:
-        return false;
-    }
+    tcg_temp_free_vec(tcg_ctx, t);
+    tcg_temp_free_vec(tcg_ctx, ones);
 }
 
-/* Each entry in this array has bit n set if the insn allows
- * size value n (otherwise it will UNDEF). Since unallocated
- * op values will have no bits set they always UNDEF.
- */
-static const uint8_t neon_2rm_sizes[] = {
-    [NEON_2RM_VREV64] = 0x7,
-    [NEON_2RM_VREV32] = 0x3,
-    [NEON_2RM_VREV16] = 0x1,
-    [NEON_2RM_VPADDL] = 0x7,
-    [NEON_2RM_VPADDL_U] = 0x7,
-    [NEON_2RM_AESE] = 0x1,
-    [NEON_2RM_AESMC] = 0x1,
-    [NEON_2RM_VCLS] = 0x7,
-    [NEON_2RM_VCLZ] = 0x7,
-    [NEON_2RM_VCNT] = 0x1,
-    [NEON_2RM_VMVN] = 0x1,
-    [NEON_2RM_VPADAL] = 0x7,
-    [NEON_2RM_VPADAL_U] = 0x7,
-    [NEON_2RM_VQABS] = 0x7,
-    [NEON_2RM_VQNEG] = 0x7,
-    [NEON_2RM_VCGT0] = 0x7,
-    [NEON_2RM_VCGE0] = 0x7,
-    [NEON_2RM_VCEQ0] = 0x7,
-    [NEON_2RM_VCLE0] = 0x7,
-    [NEON_2RM_VCLT0] = 0x7,
-    [NEON_2RM_SHA1H] = 0x4,
-    [NEON_2RM_VABS] = 0x7,
-    [NEON_2RM_VNEG] = 0x7,
-    [NEON_2RM_VCGT0_F] = 0x4,
-    [NEON_2RM_VCGE0_F] = 0x4,
-    [NEON_2RM_VCEQ0_F] = 0x4,
-    [NEON_2RM_VCLE0_F] = 0x4,
-    [NEON_2RM_VCLT0_F] = 0x4,
-    [NEON_2RM_VABS_F] = 0x4,
-    [NEON_2RM_VNEG_F] = 0x4,
-    [NEON_2RM_VSWP] = 0x1,
-    [NEON_2RM_VTRN] = 0x7,
-    [NEON_2RM_VUZP] = 0x7,
-    [NEON_2RM_VZIP] = 0x7,
-    [NEON_2RM_VMOVN] = 0x7,
-    [NEON_2RM_VQMOVN] = 0x7,
-    [NEON_2RM_VSHLL] = 0x7,
-    [NEON_2RM_SHA1SU1] = 0x4,
-    [NEON_2RM_VRINTN] = 0x4,
-    [NEON_2RM_VRINTX] = 0x4,
-    [NEON_2RM_VRINTA] = 0x4,
-    [NEON_2RM_VRINTZ] = 0x4,
-    [NEON_2RM_VCVT_F16_F32] = 0x2,
-    [NEON_2RM_VRINTM] = 0x4,
-    [NEON_2RM_VCVT_F32_F16] = 0x2,
-    [NEON_2RM_VRINTP] = 0x4,
-    [NEON_2RM_VCVTAU] = 0x4,
-    [NEON_2RM_VCVTAS] = 0x4,
-    [NEON_2RM_VCVTNU] = 0x4,
-    [NEON_2RM_VCVTNS] = 0x4,
-    [NEON_2RM_VCVTPU] = 0x4,
-    [NEON_2RM_VCVTPS] = 0x4,
-    [NEON_2RM_VCVTMU] = 0x4,
-    [NEON_2RM_VCVTMS] = 0x4,
-    [NEON_2RM_VRECPE] = 0x4,
-    [NEON_2RM_VRSQRTE] = 0x4,
-    [NEON_2RM_VRECPE_F] = 0x4,
-    [NEON_2RM_VRSQRTE_F] = 0x4,
-    [NEON_2RM_VCVT_FS] = 0x4,
-    [NEON_2RM_VCVT_FU] = 0x4,
-    [NEON_2RM_VCVT_SF] = 0x4,
-    [NEON_2RM_VCVT_UF] = 0x4,
-};
+void gen_gvec_urshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_shri_vec, INDEX_op_add_vec, 0
+    };
+    static const GVecGen2i ops[4] = {
+        { .fni8 = gen_urshr8_i64,
+          .fniv = gen_urshr_vec,
+          .fno = gen_helper_gvec_urshr_b,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fni8 = gen_urshr16_i64,
+          .fniv = gen_urshr_vec,
+          .fno = gen_helper_gvec_urshr_h,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_urshr32_i32,
+          .fniv = gen_urshr_vec,
+          .fno = gen_helper_gvec_urshr_s,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_urshr64_i64,
+          .fniv = gen_urshr_vec,
+          .fno = gen_helper_gvec_urshr_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
 
+    /* tszimm encoding produces immediates in the range [1..esize] */
+    tcg_debug_assert(shift > 0);
+    tcg_debug_assert(shift <= (8 << vece));
 
-/* Expand v8.1 simd helper.  */
-static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
-                         int q, int rd, int rn, int rm)
-{
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    if (dc_isar_feature(aa32_rdm, s)) {
-        int opr_sz = (1 + q) * 8;
-        tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd),
-                           vfp_reg_offset(1, rn),
-                           vfp_reg_offset(1, rm), tcg_ctx->cpu_env,
-                           opr_sz, opr_sz, 0, fn);
-        return 0;
+    if (shift == (8 << vece)) {
+        /*
+         * Shifts larger than the element size are architecturally valid.
+         * Unsigned results in zero.  With rounding, this produces a
+         * copy of the most significant bit.
+         */
+        tcg_gen_gvec_shri(tcg_ctx, vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
+    } else {
+        tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
     }
-    return 1;
 }
 
-static void gen_ssra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
+static void gen_ursra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    tcg_gen_vec_sar8i_i64(tcg_ctx, a, a, shift);
-    tcg_gen_vec_add8_i64(tcg_ctx, d, d, a);
-}
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
-static void gen_ssra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_vec_sar16i_i64(tcg_ctx, a, a, shift);
-    tcg_gen_vec_add16_i64(tcg_ctx, d, d, a);
+    if (sh == 8) {
+        tcg_gen_vec_shr8i_i64(tcg_ctx, t, a, 7);
+    } else {
+        gen_urshr8_i64(tcg_ctx, t, a, sh);
+    }
+    tcg_gen_vec_add8_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
 }
 
-static void gen_ssra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift)
+static void gen_ursra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    tcg_gen_sari_i32(tcg_ctx, a, a, shift);
-    tcg_gen_add_i32(tcg_ctx, d, d, a);
-}
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
-static void gen_ssra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_sari_i64(tcg_ctx, a, a, shift);
-    tcg_gen_add_i64(tcg_ctx, d, d, a);
+    if (sh == 16) {
+        tcg_gen_vec_shr16i_i64(tcg_ctx, t, a, 15);
+    } else {
+        gen_urshr16_i64(tcg_ctx, t, a, sh);
+    }
+    tcg_gen_vec_add16_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
 }
 
-static void gen_ssra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+static void gen_ursra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh)
 {
-    tcg_gen_sari_vec(tcg_ctx, vece, a, a, sh);
-    tcg_gen_add_vec(tcg_ctx, vece, d, d, a);
-}
-
-static const TCGOpcode vecop_list_ssra[] = {
-    INDEX_op_sari_vec, INDEX_op_add_vec, 0
-};
-
-const GVecGen2i ssra_op[4] = {
-    { .fni8 = gen_ssra8_i64,
-      .fniv = gen_ssra_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_ssra,
-      .vece = MO_8 },
-    { .fni8 = gen_ssra16_i64,
-      .fniv = gen_ssra_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_ssra,
-      .vece = MO_16 },
-    { .fni4 = gen_ssra32_i32,
-      .fniv = gen_ssra_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_ssra,
-      .vece = MO_32 },
-    { .fni8 = gen_ssra64_i64,
-      .fniv = gen_ssra_vec,
-      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-      .opt_opc = vecop_list_ssra,
-      .load_dest = true,
-      .vece = MO_64 },
-};
+    TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
 
-static void gen_usra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_vec_shr8i_i64(tcg_ctx, a, a, shift);
-    tcg_gen_vec_add8_i64(tcg_ctx, d, d, a);
+    if (sh == 32) {
+        tcg_gen_shri_i32(tcg_ctx, t, a, 31);
+    } else {
+        gen_urshr32_i32(tcg_ctx, t, a, sh);
+    }
+    tcg_gen_add_i32(tcg_ctx, d, d, t);
+    tcg_temp_free_i32(tcg_ctx, t);
 }
 
-static void gen_usra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
+static void gen_ursra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh)
 {
-    tcg_gen_vec_shr16i_i64(tcg_ctx, a, a, shift);
-    tcg_gen_vec_add16_i64(tcg_ctx, d, d, a);
-}
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
-static void gen_usra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
-    tcg_gen_shri_i32(tcg_ctx, a, a, shift);
-    tcg_gen_add_i32(tcg_ctx, d, d, a);
+    if (sh == 64) {
+        tcg_gen_shri_i64(tcg_ctx, t, a, 63);
+    } else {
+        gen_urshr64_i64(tcg_ctx, t, a, sh);
+    }
+    tcg_gen_add_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
 }
 
-static void gen_usra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
+static void gen_ursra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
 {
-    tcg_gen_shri_i64(tcg_ctx, a, a, shift);
-    tcg_gen_add_i64(tcg_ctx, d, d, a);
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
+
+    if (sh == (8 << vece)) {
+        tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh - 1);
+    } else {
+        gen_urshr_vec(tcg_ctx, vece, t, a, sh);
+    }
+    tcg_gen_add_vec(tcg_ctx, vece, d, d, t);
+    tcg_temp_free_vec(tcg_ctx, t);
 }
 
-static void gen_usra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+void gen_gvec_ursra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
 {
-    tcg_gen_shri_vec(tcg_ctx, vece, a, a, sh);
-    tcg_gen_add_vec(tcg_ctx, vece, d, d, a);
-}
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_shri_vec, INDEX_op_add_vec, 0
+    };
+    static const GVecGen2i ops[4] = {
+        { .fni8 = gen_ursra8_i64,
+          .fniv = gen_ursra_vec,
+          .fno = gen_helper_gvec_ursra_b,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_8 },
+        { .fni8 = gen_ursra16_i64,
+          .fniv = gen_ursra_vec,
+          .fno = gen_helper_gvec_ursra_h,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_16 },
+        { .fni4 = gen_ursra32_i32,
+          .fniv = gen_ursra_vec,
+          .fno = gen_helper_gvec_ursra_s,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_32 },
+        { .fni8 = gen_ursra64_i64,
+          .fniv = gen_ursra_vec,
+          .fno = gen_helper_gvec_ursra_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_64 },
+    };
 
-static const TCGOpcode vecop_list_usra[] = {
-    INDEX_op_shri_vec, INDEX_op_add_vec, 0
-};
+    /* tszimm encoding produces immediates in the range [1..esize] */
+    tcg_debug_assert(shift > 0);
+    tcg_debug_assert(shift <= (8 << vece));
 
-const GVecGen2i usra_op[4] = {
-    { .fni8 = gen_usra8_i64,
-      .fniv = gen_usra_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_usra,
-      .vece = MO_8, },
-    { .fni8 = gen_usra16_i64,
-      .fniv = gen_usra_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_usra,
-      .vece = MO_16, },
-    { .fni4 = gen_usra32_i32,
-      .fniv = gen_usra_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_usra,
-      .vece = MO_32, },
-    { .fni8 = gen_usra64_i64,
-      .fniv = gen_usra_vec,
-      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-      .load_dest = true,
-      .opt_opc = vecop_list_usra,
-      .vece = MO_64, },
-};
+    tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
 
 static void gen_shr8_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
 {
@@ -4157,47 +3606,62 @@ static void gen_shr64_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64
 
 static void gen_shr_ins_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
 {
-    if (sh == 0) {
-        tcg_gen_mov_vec(tcg_ctx, d, a);
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
+    TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d);
+
+    tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
+    tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh);
+    tcg_gen_and_vec(tcg_ctx, vece, d, d, m);
+    tcg_gen_or_vec(tcg_ctx, vece, d, d, t);
+
+    tcg_temp_free_vec(tcg_ctx, t);
+    tcg_temp_free_vec(tcg_ctx, m);
+}
+
+void gen_gvec_sri(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
+    const GVecGen2i ops[4] = {
+        { .fni8 = gen_shr8_ins_i64,
+          .fniv = gen_shr_ins_vec,
+          .fno = gen_helper_gvec_sri_b,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fni8 = gen_shr16_ins_i64,
+          .fniv = gen_shr_ins_vec,
+          .fno = gen_helper_gvec_sri_h,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_shr32_ins_i32,
+          .fniv = gen_shr_ins_vec,
+          .fno = gen_helper_gvec_sri_s,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_shr64_ins_i64,
+          .fniv = gen_shr_ins_vec,
+          .fno = gen_helper_gvec_sri_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
+
+    /* tszimm encoding produces immediates in the range [1..esize]. */
+    tcg_debug_assert(shift > 0);
+    tcg_debug_assert(shift <= (8 << vece));
+
+    /* Shift of esize leaves destination unchanged. */
+    if (shift < (8 << vece)) {
+        tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
     } else {
-        TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
-        TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d);
-
-        tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
-        tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh);
-        tcg_gen_and_vec(tcg_ctx, vece, d, d, m);
-        tcg_gen_or_vec(tcg_ctx, vece, d, d, t);
-
-        tcg_temp_free_vec(tcg_ctx, t);
-        tcg_temp_free_vec(tcg_ctx, m);
-    }
-}
-
-static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
-
-const GVecGen2i sri_op[4] = {
-    { .fni8 = gen_shr8_ins_i64,
-      .fniv = gen_shr_ins_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_sri,
-      .vece = MO_8 },
-    { .fni8 = gen_shr16_ins_i64,
-      .fniv = gen_shr_ins_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_sri,
-      .vece = MO_16 },
-    { .fni4 = gen_shr32_ins_i32,
-      .fniv = gen_shr_ins_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_sri,
-      .vece = MO_32 },
-    { .fni8 = gen_shr64_ins_i64,
-      .fniv = gen_shr_ins_vec,
-      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-      .load_dest = true,
-      .opt_opc = vecop_list_sri,
-      .vece = MO_64 },
-};
+        /* Nop, but we do need to clear the tail. */
+        tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+    }
+}
 
 static void gen_shl8_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift)
 {
@@ -4235,47 +3699,60 @@ static void gen_shl64_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64
 
 static void gen_shl_ins_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
 {
-    if (sh == 0) {
-        tcg_gen_mov_vec(tcg_ctx, d, a);
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
+    TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d);
+
+    tcg_gen_shli_vec(tcg_ctx, vece, t, a, sh);
+    tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK(0, sh));
+    tcg_gen_and_vec(tcg_ctx, vece, d, d, m);
+    tcg_gen_or_vec(tcg_ctx, vece, d, d, t);
+
+    tcg_temp_free_vec(tcg_ctx, t);
+    tcg_temp_free_vec(tcg_ctx, m);
+}
+
+void gen_gvec_sli(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
+    const GVecGen2i ops[4] = {
+        { .fni8 = gen_shl8_ins_i64,
+          .fniv = gen_shl_ins_vec,
+          .fno = gen_helper_gvec_sli_b,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fni8 = gen_shl16_ins_i64,
+          .fniv = gen_shl_ins_vec,
+          .fno = gen_helper_gvec_sli_h,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_shl32_ins_i32,
+          .fniv = gen_shl_ins_vec,
+          .fno = gen_helper_gvec_sli_s,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_shl64_ins_i64,
+          .fniv = gen_shl_ins_vec,
+          .fno = gen_helper_gvec_sli_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
+
+    /* tszimm encoding produces immediates in the range [0..esize-1]. */
+    tcg_debug_assert(shift >= 0);
+    tcg_debug_assert(shift < (8 << vece));
+
+    if (shift == 0) {
+        tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rm_ofs, opr_sz, max_sz);
     } else {
-        TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
-        TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d);
-
-        tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK(0, sh));
-        tcg_gen_shli_vec(tcg_ctx, vece, t, a, sh);
-        tcg_gen_and_vec(tcg_ctx, vece, d, d, m);
-        tcg_gen_or_vec(tcg_ctx, vece, d, d, t);
-
-        tcg_temp_free_vec(tcg_ctx, t);
-        tcg_temp_free_vec(tcg_ctx, m);
-    }
-}
-
-static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
-
-const GVecGen2i sli_op[4] = {
-    { .fni8 = gen_shl8_ins_i64,
-      .fniv = gen_shl_ins_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_sli,
-      .vece = MO_8 },
-    { .fni8 = gen_shl16_ins_i64,
-      .fniv = gen_shl_ins_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_sli,
-      .vece = MO_16 },
-    { .fni4 = gen_shl32_ins_i32,
-      .fniv = gen_shl_ins_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_sli,
-      .vece = MO_32 },
-    { .fni8 = gen_shl64_ins_i64,
-      .fniv = gen_shl_ins_vec,
-      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-      .load_dest = true,
-      .opt_opc = vecop_list_sli,
-      .vece = MO_64 },
-};
+        tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+    }
+}
 
 static void gen_mla8_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
 {
@@ -4340,62 +3817,69 @@ static void gen_mls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec
 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
  * these tables are shared with AArch64 which does support them.
  */
+void gen_gvec_mla(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_mul_vec, INDEX_op_add_vec, 0
+    };
+    static const GVecGen3 ops[4] = {
+        { .fni4 = gen_mla8_i32,
+          .fniv = gen_mla_vec,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fni4 = gen_mla16_i32,
+          .fniv = gen_mla_vec,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_mla32_i32,
+          .fniv = gen_mla_vec,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_mla64_i64,
+          .fniv = gen_mla_vec,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
 
-static const TCGOpcode vecop_list_mla[] = {
-    INDEX_op_mul_vec, INDEX_op_add_vec, 0
-};
-
-static const TCGOpcode vecop_list_mls[] = {
-    INDEX_op_mul_vec, INDEX_op_sub_vec, 0
-};
-
-const GVecGen3 mla_op[4] = {
-    { .fni4 = gen_mla8_i32,
-      .fniv = gen_mla_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_mla,
-      .vece = MO_8 },
-    { .fni4 = gen_mla16_i32,
-      .fniv = gen_mla_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_mla,
-      .vece = MO_16 },
-    { .fni4 = gen_mla32_i32,
-      .fniv = gen_mla_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_mla,
-      .vece = MO_32 },
-    { .fni8 = gen_mla64_i64,
-      .fniv = gen_mla_vec,
-      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-      .load_dest = true,
-      .opt_opc = vecop_list_mla,
-      .vece = MO_64 },
-};
-
-const GVecGen3 mls_op[4] = {
-    { .fni4 = gen_mls8_i32,
-      .fniv = gen_mls_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_mls,
-      .vece = MO_8 },
-    { .fni4 = gen_mls16_i32,
-      .fniv = gen_mls_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_mls,
-      .vece = MO_16 },
-    { .fni4 = gen_mls32_i32,
-      .fniv = gen_mls_vec,
-      .load_dest = true,
-      .opt_opc = vecop_list_mls,
-      .vece = MO_32 },
-    { .fni8 = gen_mls64_i64,
-      .fniv = gen_mls_vec,
-      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-      .load_dest = true,
-      .opt_opc = vecop_list_mls,
-      .vece = MO_64 },
-};
+void gen_gvec_mls(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
+    };
+    static const GVecGen3 ops[4] = {
+        { .fni4 = gen_mls8_i32,
+          .fniv = gen_mls_vec,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fni4 = gen_mls16_i32,
+          .fniv = gen_mls_vec,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_mls32_i32,
+          .fniv = gen_mls_vec,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_mls64_i64,
+          .fniv = gen_mls_vec,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .load_dest = true,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
 
 /* CMTST : test is "if (X & Y != 0)". */
 static void gen_cmtst_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
@@ -4419,27 +3903,31 @@ static void gen_cmtst_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_v
     tcg_gen_cmp_vec(tcg_ctx, TCG_COND_NE, vece, d, d, a);
 }
 
-static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
-
-const GVecGen3 cmtst_op[4] = {
-    { .fni4 = gen_helper_neon_tst_u8,
-      .fniv = gen_cmtst_vec,
-      .opt_opc = vecop_list_cmtst,
-      .vece = MO_8 },
-    { .fni4 = gen_helper_neon_tst_u16,
-      .fniv = gen_cmtst_vec,
-      .opt_opc = vecop_list_cmtst,
-      .vece = MO_16 },
-    { .fni4 = gen_cmtst_i32,
-      .fniv = gen_cmtst_vec,
-      .opt_opc = vecop_list_cmtst,
-      .vece = MO_32 },
-    { .fni8 = gen_cmtst_i64,
-      .fniv = gen_cmtst_vec,
-      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-      .opt_opc = vecop_list_cmtst,
-      .vece = MO_64 },
-};
+void gen_gvec_cmtst(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
+    static const GVecGen3 ops[4] = {
+        { .fni4 = gen_helper_neon_tst_u8,
+          .fniv = gen_cmtst_vec,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fni4 = gen_helper_neon_tst_u16,
+          .fniv = gen_cmtst_vec,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_cmtst_i32,
+          .fniv = gen_cmtst_vec,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_cmtst_i64,
+          .fniv = gen_cmtst_vec,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
 
 void gen_ushl_i32(TCGContext *tcg_ctx, TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
 {
@@ -4557,29 +4045,33 @@ static void gen_ushl_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec dst,
     tcg_temp_free_vec(tcg_ctx, rsh);
 }
 
-static const TCGOpcode ushl_list[] = {
-    INDEX_op_neg_vec, INDEX_op_shlv_vec,
-    INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
-};
-
-const GVecGen3 ushl_op[4] = {
-    { .fniv = gen_ushl_vec,
-      .fno = gen_helper_gvec_ushl_b,
-      .opt_opc = ushl_list,
-      .vece = MO_8 },
-    { .fniv = gen_ushl_vec,
-      .fno = gen_helper_gvec_ushl_h,
-      .opt_opc = ushl_list,
-      .vece = MO_16 },
-    { .fni4 = gen_ushl_i32,
-      .fniv = gen_ushl_vec,
-      .opt_opc = ushl_list,
-      .vece = MO_32 },
-    { .fni8 = gen_ushl_i64,
-      .fniv = gen_ushl_vec,
-      .opt_opc = ushl_list,
-      .vece = MO_64 },
-};
+void gen_gvec_ushl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_neg_vec, INDEX_op_shlv_vec,
+        INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
+    };
+    static const GVecGen3 ops[4] = {
+        { .fniv = gen_ushl_vec,
+          .fno = gen_helper_gvec_ushl_b,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fniv = gen_ushl_vec,
+          .fno = gen_helper_gvec_ushl_h,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_ushl_i32,
+          .fniv = gen_ushl_vec,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_ushl_i64,
+          .fniv = gen_ushl_vec,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
 
 void gen_sshl_i32(TCGContext *tcg_ctx, TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
 {
@@ -4691,29 +4183,33 @@ static void gen_sshl_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec dst,
     tcg_temp_free_vec(tcg_ctx, tmp);
 }
 
-static const TCGOpcode sshl_list[] = {
-    INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
-    INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
-};
-
-const GVecGen3 sshl_op[4] = {
-    { .fniv = gen_sshl_vec,
-      .fno = gen_helper_gvec_sshl_b,
-      .opt_opc = sshl_list,
-      .vece = MO_8 },
-    { .fniv = gen_sshl_vec,
-      .fno = gen_helper_gvec_sshl_h,
-      .opt_opc = sshl_list,
-      .vece = MO_16 },
-    { .fni4 = gen_sshl_i32,
-      .fniv = gen_sshl_vec,
-      .opt_opc = sshl_list,
-      .vece = MO_32 },
-    { .fni8 = gen_sshl_i64,
-      .fniv = gen_sshl_vec,
-      .opt_opc = sshl_list,
-      .vece = MO_64 },
-};
+void gen_gvec_sshl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
+        INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+    };
+    static const GVecGen3 ops[4] = {
+        { .fniv = gen_sshl_vec,
+          .fno = gen_helper_gvec_sshl_b,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fniv = gen_sshl_vec,
+          .fno = gen_helper_gvec_sshl_h,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_sshl_i32,
+          .fniv = gen_sshl_vec,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_sshl_i64,
+          .fniv = gen_sshl_vec,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
 
 static void gen_uqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat,
                           TCGv_vec a, TCGv_vec b)
@@ -4726,32 +4222,37 @@ static void gen_uqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v
     tcg_temp_free_vec(tcg_ctx, x);
 }
 
-static const TCGOpcode vecop_list_uqadd[] = {
-    INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
-};
-
-const GVecGen4 uqadd_op[4] = {
-    { .fniv = gen_uqadd_vec,
-      .fno = gen_helper_gvec_uqadd_b,
-      .write_aofs = true,
-      .opt_opc = vecop_list_uqadd,
-      .vece = MO_8 },
-    { .fniv = gen_uqadd_vec,
-      .fno = gen_helper_gvec_uqadd_h,
-      .write_aofs = true,
-      .opt_opc = vecop_list_uqadd,
-      .vece = MO_16 },
-    { .fniv = gen_uqadd_vec,
-      .fno = gen_helper_gvec_uqadd_s,
-      .write_aofs = true,
-      .opt_opc = vecop_list_uqadd,
-      .vece = MO_32 },
-    { .fniv = gen_uqadd_vec,
-      .fno = gen_helper_gvec_uqadd_d,
-      .write_aofs = true,
-      .opt_opc = vecop_list_uqadd,
-      .vece = MO_64 },
-};
+void gen_gvec_uqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+    };
+    static const GVecGen4 ops[4] = {
+        { .fniv = gen_uqadd_vec,
+          .fno = gen_helper_gvec_uqadd_b,
+          .write_aofs = true,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fniv = gen_uqadd_vec,
+          .fno = gen_helper_gvec_uqadd_h,
+          .write_aofs = true,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fniv = gen_uqadd_vec,
+          .fno = gen_helper_gvec_uqadd_s,
+          .write_aofs = true,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fniv = gen_uqadd_vec,
+          .fno = gen_helper_gvec_uqadd_d,
+          .write_aofs = true,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc),
+                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
 
 static void gen_sqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat,
                           TCGv_vec a, TCGv_vec b)
@@ -4764,32 +4265,37 @@ static void gen_sqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v
     tcg_temp_free_vec(tcg_ctx, x);
 }
 
-static const TCGOpcode vecop_list_sqadd[] = {
-    INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
-};
-
-const GVecGen4 sqadd_op[4] = {
-    { .fniv = gen_sqadd_vec,
-      .fno = gen_helper_gvec_sqadd_b,
-      .opt_opc = vecop_list_sqadd,
-      .write_aofs = true,
-      .vece = MO_8 },
-    { .fniv = gen_sqadd_vec,
-      .fno = gen_helper_gvec_sqadd_h,
-      .opt_opc = vecop_list_sqadd,
-      .write_aofs = true,
-      .vece = MO_16 },
-    { .fniv = gen_sqadd_vec,
-      .fno = gen_helper_gvec_sqadd_s,
-      .opt_opc = vecop_list_sqadd,
-      .write_aofs = true,
-      .vece = MO_32 },
-    { .fniv = gen_sqadd_vec,
-      .fno = gen_helper_gvec_sqadd_d,
-      .opt_opc = vecop_list_sqadd,
-      .write_aofs = true,
-      .vece = MO_64 },
-};
+void gen_gvec_sqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+    };
+    static const GVecGen4 ops[4] = {
+        { .fniv = gen_sqadd_vec,
+          .fno = gen_helper_gvec_sqadd_b,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_8 },
+        { .fniv = gen_sqadd_vec,
+          .fno = gen_helper_gvec_sqadd_h,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_16 },
+        { .fniv = gen_sqadd_vec,
+          .fno = gen_helper_gvec_sqadd_s,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_32 },
+        { .fniv = gen_sqadd_vec,
+          .fno = gen_helper_gvec_sqadd_d,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc),
+                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
 
 static void gen_uqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat,
                           TCGv_vec a, TCGv_vec b)
@@ -4802,32 +4308,37 @@ static void gen_uqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v
     tcg_temp_free_vec(tcg_ctx, x);
 }
 
-static const TCGOpcode vecop_list_uqsub[] = {
-    INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
-};
-
-const GVecGen4 uqsub_op[4] = {
-    { .fniv = gen_uqsub_vec,
-      .fno = gen_helper_gvec_uqsub_b,
-      .opt_opc = vecop_list_uqsub,
-      .write_aofs = true,
-      .vece = MO_8 },
-    { .fniv = gen_uqsub_vec,
-      .fno = gen_helper_gvec_uqsub_h,
-      .opt_opc = vecop_list_uqsub,
-      .write_aofs = true,
-      .vece = MO_16 },
-    { .fniv = gen_uqsub_vec,
-      .fno = gen_helper_gvec_uqsub_s,
-      .opt_opc = vecop_list_uqsub,
-      .write_aofs = true,
-      .vece = MO_32 },
-    { .fniv = gen_uqsub_vec,
-      .fno = gen_helper_gvec_uqsub_d,
-      .opt_opc = vecop_list_uqsub,
-      .write_aofs = true,
-      .vece = MO_64 },
-};
+void gen_gvec_uqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+    };
+    static const GVecGen4 ops[4] = {
+        { .fniv = gen_uqsub_vec,
+          .fno = gen_helper_gvec_uqsub_b,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_8 },
+        { .fniv = gen_uqsub_vec,
+          .fno = gen_helper_gvec_uqsub_h,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_16 },
+        { .fniv = gen_uqsub_vec,
+          .fno = gen_helper_gvec_uqsub_s,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_32 },
+        { .fniv = gen_uqsub_vec,
+          .fno = gen_helper_gvec_uqsub_d,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc),
+                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
 
 static void gen_sqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat,
                           TCGv_vec a, TCGv_vec b)
@@ -4840,2321 +4351,274 @@ static void gen_sqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v
     tcg_temp_free_vec(tcg_ctx, x);
 }
 
-static const TCGOpcode vecop_list_sqsub[] = {
-    INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
-};
-
-const GVecGen4 sqsub_op[4] = {
-    { .fniv = gen_sqsub_vec,
-      .fno = gen_helper_gvec_sqsub_b,
-      .opt_opc = vecop_list_sqsub,
-      .write_aofs = true,
-      .vece = MO_8 },
-    { .fniv = gen_sqsub_vec,
-      .fno = gen_helper_gvec_sqsub_h,
-      .opt_opc = vecop_list_sqsub,
-      .write_aofs = true,
-      .vece = MO_16 },
-    { .fniv = gen_sqsub_vec,
-      .fno = gen_helper_gvec_sqsub_s,
-      .opt_opc = vecop_list_sqsub,
-      .write_aofs = true,
-      .vece = MO_32 },
-    { .fniv = gen_sqsub_vec,
-      .fno = gen_helper_gvec_sqsub_d,
-      .opt_opc = vecop_list_sqsub,
-      .write_aofs = true,
-      .vece = MO_64 },
-};
-
-/* Translate a NEON data processing instruction.  Return nonzero if the
-   instruction is invalid.
-   We process data in a mixture of 32-bit and 64-bit chunks.
-   Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
-
-static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
+void gen_gvec_sqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    int op;
-    int q;
-    int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
-    int size;
-    int shift;
-    int pass;
-    int count;
-    int pairwise;
-    int u;
-    int vec_size;
-    uint32_t imm;
-    TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
-    TCGv_ptr ptr1, ptr2, ptr3;
-    TCGv_i64 tmp64;
-
-    /* FIXME: this access check should not take precedence over UNDEF
-     * for invalid encodings; we will generate incorrect syndrome information
-     * for attempts to execute invalid vfp/neon encodings with FP disabled.
-     */
-    if (s->fp_excp_el) {
-        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
-                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
-        return 0;
-    }
-
-    if (!s->vfp_enabled)
-      return 1;
-    q = (insn & (1 << 6)) != 0;
-    u = (insn >> 24) & 1;
-    VFP_DREG_D(rd, insn);
-    VFP_DREG_N(rn, insn);
-    VFP_DREG_M(rm, insn);
-    size = (insn >> 20) & 3;
-    vec_size = q ? 16 : 8;
-    rd_ofs = neon_reg_offset(rd, 0);
-    rn_ofs = neon_reg_offset(rn, 0);
-    rm_ofs = neon_reg_offset(rm, 0);
-
-    if ((insn & (1 << 23)) == 0) {
-        /* Three register same length.  */
-        op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
-        /* Catch invalid op and bad size combinations: UNDEF */
-        if ((neon_3r_sizes[op] & (1 << size)) == 0) {
-            return 1;
-        }
-        /* All insns of this form UNDEF for either this condition or the
-         * superset of cases "Q==1"; we catch the latter later.
-         */
-        if (q && ((rd | rn | rm) & 1)) {
-            return 1;
-        }
-        switch (op) {
-        case NEON_3R_SHA:
-            /* The SHA-1/SHA-256 3-register instructions require special
-             * treatment here, as their size field is overloaded as an
-             * op type selector, and they all consume their input in a
-             * single pass.
-             */
-            if (!q) {
-                return 1;
-            }
-            if (!u) { /* SHA-1 */
-                if (!dc_isar_feature(aa32_sha1, s)) {
-                    return 1;
-                }
-                ptr1 = vfp_reg_ptr(tcg_ctx, true, rd);
-                ptr2 = vfp_reg_ptr(tcg_ctx, true, rn);
-                ptr3 = vfp_reg_ptr(tcg_ctx, true, rm);
-                tmp4 = tcg_const_i32(tcg_ctx, size);
-                gen_helper_crypto_sha1_3reg(tcg_ctx, ptr1, ptr2, ptr3, tmp4);
-                tcg_temp_free_i32(tcg_ctx, tmp4);
-            } else { /* SHA-256 */
-                if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
-                    return 1;
-                }
-                ptr1 = vfp_reg_ptr(tcg_ctx, true, rd);
-                ptr2 = vfp_reg_ptr(tcg_ctx, true, rn);
-                ptr3 = vfp_reg_ptr(tcg_ctx, true, rm);
-                switch (size) {
-                case 0:
-                    gen_helper_crypto_sha256h(tcg_ctx, ptr1, ptr2, ptr3);
-                    break;
-                case 1:
-                    gen_helper_crypto_sha256h2(tcg_ctx, ptr1, ptr2, ptr3);
-                    break;
-                case 2:
-                    gen_helper_crypto_sha256su1(tcg_ctx, ptr1, ptr2, ptr3);
-                    break;
-                }
-            }
-            tcg_temp_free_ptr(tcg_ctx, ptr1);
-            tcg_temp_free_ptr(tcg_ctx, ptr2);
-            tcg_temp_free_ptr(tcg_ctx, ptr3);
-            return 0;
-
-        case NEON_3R_VPADD_VQRDMLAH:
-            if (!u) {
-                break;  /* VPADD */
-            }
-            /* VQRDMLAH */
-            switch (size) {
-            case 1:
-                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
-                                     q, rd, rn, rm);
-            case 2:
-                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
-                                     q, rd, rn, rm);
-            }
-            return 1;
-
-        case NEON_3R_VFM_VQRDMLSH:
-            if (!u) {
-                /* VFM, VFMS */
-                if (size == 1) {
-                    return 1;
-                }
-                break;
-            }
-            /* VQRDMLSH */
-            switch (size) {
-            case 1:
-                return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
-                                     q, rd, rn, rm);
-            case 2:
-                return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
-                                     q, rd, rn, rm);
-            }
-            return 1;
-
-        case NEON_3R_LOGIC: /* Logic ops.  */
-            switch ((u << 2) | size) {
-            case 0: /* VAND */
-                tcg_gen_gvec_and(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs,
-                                 vec_size, vec_size);
-                break;
-            case 1: /* VBIC */
-                tcg_gen_gvec_andc(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs,
-                                  vec_size, vec_size);
-                break;
-            case 2: /* VORR */
-                tcg_gen_gvec_or(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs,
-                                vec_size, vec_size);
-                break;
-            case 3: /* VORN */
-                tcg_gen_gvec_orc(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs,
-                                 vec_size, vec_size);
-                break;
-            case 4: /* VEOR */
-                tcg_gen_gvec_xor(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs,
-                                 vec_size, vec_size);
-                break;
-            case 5: /* VBSL */
-                tcg_gen_gvec_bitsel(tcg_ctx, MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
-                                    vec_size, vec_size);
-                break;
-            case 6: /* VBIT */
-                tcg_gen_gvec_bitsel(tcg_ctx, MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
-                                    vec_size, vec_size);
-                break;
-            case 7: /* VBIF */
-                tcg_gen_gvec_bitsel(tcg_ctx, MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
-                                    vec_size, vec_size);
-                break;
-            }
-            return 0;
-
-        case NEON_3R_VADD_VSUB:
-            if (u) {
-                tcg_gen_gvec_sub(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs,
-                                 vec_size, vec_size);
-            } else {
-                tcg_gen_gvec_add(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs,
-                                 vec_size, vec_size);
-            }
-            return 0;
-
-        case NEON_3R_VQADD:
-            tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc),
-                           rn_ofs, rm_ofs, vec_size, vec_size,
-                           (u ? uqadd_op : sqadd_op) + size);
-            return 0;
-
-        case NEON_3R_VQSUB:
-            tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc),
-                           rn_ofs, rm_ofs, vec_size, vec_size,
-                           (u ? uqsub_op : sqsub_op) + size);
-            return 0;
-
-        case NEON_3R_VMUL: /* VMUL */
-            if (u) {
-                /* Polynomial case allows only P8.  */
-                if (size != 0) {
-                    return 1;
-                }
-                tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
-                                   0, gen_helper_gvec_pmul_b);
-            } else {
-                tcg_gen_gvec_mul(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs,
-                                 vec_size, vec_size);
-            }
-            return 0;
-
-        case NEON_3R_VML: /* VMLA, VMLS */
-            tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
-                           u ? &mls_op[size] : &mla_op[size]);
-            return 0;
-
-        case NEON_3R_VTST_VCEQ:
-            if (u) { /* VCEQ */
-                tcg_gen_gvec_cmp(tcg_ctx, TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
-                                 vec_size, vec_size);
-            } else { /* VTST */
-                tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs,
-                               vec_size, vec_size, &cmtst_op[size]);
-            }
-            return 0;
-
-        case NEON_3R_VCGT:
-            tcg_gen_gvec_cmp(tcg_ctx, u ? TCG_COND_GTU : TCG_COND_GT, size,
-                             rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
-            return 0;
-
-        case NEON_3R_VCGE:
-            tcg_gen_gvec_cmp(tcg_ctx, u ? TCG_COND_GEU : TCG_COND_GE, size,
-                             rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
-            return 0;
-
-        case NEON_3R_VMAX:
-            if (u) {
-                tcg_gen_gvec_umax(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs,
-                                  vec_size, vec_size);
-            } else {
-                tcg_gen_gvec_smax(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs,
-                                  vec_size, vec_size);
-            }
-            return 0;
-        case NEON_3R_VMIN:
-            if (u) {
-                tcg_gen_gvec_umin(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs,
-                                  vec_size, vec_size);
-            } else {
-                tcg_gen_gvec_smin(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs,
-                                  vec_size, vec_size);
-            }
-            return 0;
-
-        case NEON_3R_VSHL:
-            /* Note the operation is vshl vd,vm,vn */
-            tcg_gen_gvec_3(tcg_ctx, rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size,
-                           u ? &ushl_op[size] : &sshl_op[size]);
-            return 0;
-        }
-
-        if (size == 3) {
-            /* 64-bit element instructions. */
-            for (pass = 0; pass < (q ? 2 : 1); pass++) {
-                neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + pass);
-                neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + pass);
-                switch (op) {
-                case NEON_3R_VQSHL:
-                    if (u) {
-                        gen_helper_neon_qshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env,
-                                                 tcg_ctx->cpu_V1, tcg_ctx->cpu_V0);
-                    } else {
-                        gen_helper_neon_qshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env,
-                                                 tcg_ctx->cpu_V1, tcg_ctx->cpu_V0);
-                    }
-                    break;
-                case NEON_3R_VRSHL:
-                    if (u) {
-                        gen_helper_neon_rshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, tcg_ctx->cpu_V0);
-                    } else {
-                        gen_helper_neon_rshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, tcg_ctx->cpu_V0);
-                    }
-                    break;
-                case NEON_3R_VQRSHL:
-                    if (u) {
-                        gen_helper_neon_qrshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env,
-                                                  tcg_ctx->cpu_V1, tcg_ctx->cpu_V0);
-                    } else {
-                        gen_helper_neon_qrshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env,
-                                                  tcg_ctx->cpu_V1, tcg_ctx->cpu_V0);
-                    }
-                    break;
-                default:
-                    abort();
-                }
-                neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass);
-            }
-            return 0;
-        }
-        pairwise = 0;
-        switch (op) {
-        case NEON_3R_VQSHL:
-        case NEON_3R_VRSHL:
-        case NEON_3R_VQRSHL:
-            {
-                int rtmp;
-                /* Shift instruction operands are reversed.  */
-                rtmp = rn;
-                rn = rm;
-                rm = rtmp;
-            }
-            break;
-        case NEON_3R_VPADD_VQRDMLAH:
-        case NEON_3R_VPMAX:
-        case NEON_3R_VPMIN:
-            pairwise = 1;
-            break;
-        case NEON_3R_FLOAT_ARITH:
-            pairwise = (u && size < 2); /* if VPADD (float) */
-            break;
-        case NEON_3R_FLOAT_MINMAX:
-            pairwise = u; /* if VPMIN/VPMAX (float) */
-            break;
-        case NEON_3R_FLOAT_CMP:
-            if (!u && size) {
-                /* no encoding for U=0 C=1x */
-                return 1;
-            }
-            break;
-        case NEON_3R_FLOAT_ACMP:
-            if (!u) {
-                return 1;
-            }
-            break;
-        case NEON_3R_FLOAT_MISC:
-            /* VMAXNM/VMINNM in ARMv8 */
-            if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
-                return 1;
-            }
-            break;
-        case NEON_3R_VFM_VQRDMLSH:
-            if (!dc_isar_feature(aa32_simdfmac, s)) {
-                return 1;
-            }
-            break;
-        default:
-            break;
-        }
-
-        if (pairwise && q) {
-            /* All the pairwise insns UNDEF if Q is set */
-            return 1;
-        }
-
-        for (pass = 0; pass < (q ? 4 : 2); pass++) {
-
-        if (pairwise) {
-            /* Pairwise.  */
-            if (pass < 1) {
-                tmp = neon_load_reg(tcg_ctx, rn, 0);
-                tmp2 = neon_load_reg(tcg_ctx, rn, 1);
-            } else {
-                tmp = neon_load_reg(tcg_ctx, rm, 0);
-                tmp2 = neon_load_reg(tcg_ctx, rm, 1);
-            }
-        } else {
-            /* Elementwise.  */
-            tmp = neon_load_reg(tcg_ctx, rn, pass);
-            tmp2 = neon_load_reg(tcg_ctx, rm, pass);
-        }
-        switch (op) {
-        case NEON_3R_VHADD:
-            GEN_NEON_INTEGER_OP(hadd);
-            break;
-        case NEON_3R_VRHADD:
-            GEN_NEON_INTEGER_OP(rhadd);
-            break;
-        case NEON_3R_VHSUB:
-            GEN_NEON_INTEGER_OP(hsub);
-            break;
-        case NEON_3R_VQSHL:
-            GEN_NEON_INTEGER_OP_ENV(qshl);
-            break;
-        case NEON_3R_VRSHL:
-            GEN_NEON_INTEGER_OP(rshl);
-            break;
-        case NEON_3R_VQRSHL:
-            GEN_NEON_INTEGER_OP_ENV(qrshl);
-            break;
-        case NEON_3R_VABD:
-            GEN_NEON_INTEGER_OP(abd);
-            break;
-        case NEON_3R_VABA:
-            GEN_NEON_INTEGER_OP(abd);
-            tcg_temp_free_i32(tcg_ctx, tmp2);
-            tmp2 = neon_load_reg(tcg_ctx, rd, pass);
-            gen_neon_add(tcg_ctx, size, tmp, tmp2);
-            break;
-        case NEON_3R_VPMAX:
-            GEN_NEON_INTEGER_OP(pmax);
-            break;
-        case NEON_3R_VPMIN:
-            GEN_NEON_INTEGER_OP(pmin);
-            break;
-        case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
-            if (!u) { /* VQDMULH */
-                switch (size) {
-                case 1:
-                    gen_helper_neon_qdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
-                    break;
-                case 2:
-                    gen_helper_neon_qdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
-                    break;
-                default: abort();
-                }
-            } else { /* VQRDMULH */
-                switch (size) {
-                case 1:
-                    gen_helper_neon_qrdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
-                    break;
-                case 2:
-                    gen_helper_neon_qrdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
-                    break;
-                default: abort();
-                }
-            }
-            break;
-        case NEON_3R_VPADD_VQRDMLAH:
-            switch (size) {
-            case 0: gen_helper_neon_padd_u8(tcg_ctx, tmp, tmp, tmp2); break;
-            case 1: gen_helper_neon_padd_u16(tcg_ctx, tmp, tmp, tmp2); break;
-            case 2: tcg_gen_add_i32(tcg_ctx, tmp, tmp, tmp2); break;
-            default: abort();
-            }
-            break;
-        case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
-        {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-            switch ((u << 2) | size) {
-            case 0: /* VADD */
-            case 4: /* VPADD */
-                gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                break;
-            case 2: /* VSUB */
-                gen_helper_vfp_subs(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                break;
-            case 6: /* VABD */
-                gen_helper_neon_abd_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                break;
-            default:
-                abort();
-            }
-            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-            break;
-        }
-        case NEON_3R_FLOAT_MULTIPLY:
-        {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-            gen_helper_vfp_muls(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-            if (!u) {
-                tcg_temp_free_i32(tcg_ctx, tmp2);
-                tmp2 = neon_load_reg(tcg_ctx, rd, pass);
-                if (size == 0) {
-                    gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                } else {
-                    gen_helper_vfp_subs(tcg_ctx, tmp, tmp2, tmp, fpstatus);
-                }
-            }
-            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-            break;
-        }
-        case NEON_3R_FLOAT_CMP:
-        {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-            if (!u) {
-                gen_helper_neon_ceq_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-            } else {
-                if (size == 0) {
-                    gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                } else {
-                    gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                }
-            }
-            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-            break;
-        }
-        case NEON_3R_FLOAT_ACMP:
-        {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-            if (size == 0) {
-                gen_helper_neon_acge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-            } else {
-                gen_helper_neon_acgt_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-            }
-            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-            break;
-        }
-        case NEON_3R_FLOAT_MINMAX:
-        {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-            if (size == 0) {
-                gen_helper_vfp_maxs(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-            } else {
-                gen_helper_vfp_mins(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-            }
-            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-            break;
-        }
-        case NEON_3R_FLOAT_MISC:
-            if (u) {
-                /* VMAXNM/VMINNM */
-                TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                if (size == 0) {
-                    gen_helper_vfp_maxnums(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                } else {
-                    gen_helper_vfp_minnums(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                }
-                tcg_temp_free_ptr(tcg_ctx, fpstatus);
-            } else {
-                if (size == 0) {
-                    gen_helper_recps_f32(tcg_ctx, tmp, tmp, tmp2, tcg_ctx->cpu_env);
-                } else {
-                    gen_helper_rsqrts_f32(tcg_ctx, tmp, tmp, tmp2, tcg_ctx->cpu_env);
-              }
-            }
-            break;
-        case NEON_3R_VFM_VQRDMLSH:
-        {
-            /* VFMA, VFMS: fused multiply-add */
-            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-            TCGv_i32 tmp3 = neon_load_reg(tcg_ctx, rd, pass);
-            if (size) {
-                /* VFMS */
-                gen_helper_vfp_negs(tcg_ctx, tmp, tmp);
-            }
-            gen_helper_vfp_muladds(tcg_ctx, tmp, tmp, tmp2, tmp3, fpstatus);
-            tcg_temp_free_i32(tcg_ctx, tmp3);
-            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-            break;
-        }
-        default:
-            abort();
-        }
-        tcg_temp_free_i32(tcg_ctx, tmp2);
-
-        /* Save the result.  For elementwise operations we can put it
-           straight into the destination register.  For pairwise operations
-           we have to be careful to avoid clobbering the source operands.  */
-        if (pairwise && rd == rm) {
-            neon_store_scratch(tcg_ctx, pass, tmp);
-        } else {
-            neon_store_reg(tcg_ctx, rd, pass, tmp);
-        }
-
-        } /* for pass */
-        if (pairwise && rd == rm) {
-            for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                tmp = neon_load_scratch(tcg_ctx, pass);
-                neon_store_reg(tcg_ctx, rd, pass, tmp);
-            }
-        }
-        /* End of 3 register same size operations.  */
-    } else if (insn & (1 << 4)) {
-        if ((insn & 0x00380080) != 0) {
-            /* Two registers and shift.  */
-            op = (insn >> 8) & 0xf;
-            if (insn & (1 << 7)) {
-                /* 64-bit shift. */
-                if (op > 7) {
-                    return 1;
-                }
-                size = 3;
-            } else {
-                size = 2;
-                while ((insn & (1 << (size + 19))) == 0)
-                    size--;
-            }
-            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
-            if (op < 8) {
-                /* Shift by immediate:
-                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
-                if (q && ((rd | rm) & 1)) {
-                    return 1;
-                }
-                if (!u && (op == 4 || op == 6)) {
-                    return 1;
-                }
-                /* Right shifts are encoded as N - shift, where N is the
-                   element size in bits.  */
-                if (op <= 4) {
-                    shift = shift - (1 << (size + 3));
-                }
-
-                switch (op) {
-                case 0:  /* VSHR */
-                    /* Right shift comes here negative.  */
-                    shift = -shift;
-                    /* Shifts larger than the element size are architecturally
-                     * valid.  Unsigned results in all zeros; signed results
-                     * in all sign bits.
-                     */
-                    if (!u) {
-                        tcg_gen_gvec_sari(tcg_ctx, size, rd_ofs, rm_ofs,
-                                          MIN(shift, (8 << size) - 1),
-                                          vec_size, vec_size);
-                    } else if (shift >= 8 << size) {
-                        tcg_gen_gvec_dup8i(tcg_ctx, rd_ofs, vec_size, vec_size, 0);
-                    } else {
-                        tcg_gen_gvec_shri(tcg_ctx, size, rd_ofs, rm_ofs, shift,
-                                          vec_size, vec_size);
-                    }
-                    return 0;
-
-                case 1:  /* VSRA */
-                    /* Right shift comes here negative.  */
-                    shift = -shift;
-                    /* Shifts larger than the element size are architecturally
-                     * valid.  Unsigned results in all zeros; signed results
-                     * in all sign bits.
-                     */
-                    if (!u) {
-                        tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size,
-                                        MIN(shift, (8 << size) - 1),
-                                        &ssra_op[size]);
-                    } else if (shift >= 8 << size) {
-                        /* rd += 0 */
-                    } else {
-                        tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size,
-                                        shift, &usra_op[size]);
-                    }
-                    return 0;
-
-                case 4: /* VSRI */
-                    if (!u) {
-                        return 1;
-                    }
-                    /* Right shift comes here negative.  */
-                    shift = -shift;
-                    /* Shift out of range leaves destination unchanged.  */
-                    if (shift < 8 << size) {
-                        tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size,
-                                        shift, &sri_op[size]);
-                    }
-                    return 0;
-
-                case 5: /* VSHL, VSLI */
-                    if (u) { /* VSLI */
-                        /* Shift out of range leaves destination unchanged.  */
-                        if (shift < 8 << size) {
-                            tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size,
-                                            vec_size, shift, &sli_op[size]);
-                        }
-                    } else { /* VSHL */
-                        /* Shifts larger than the element size are
-                         * architecturally valid and results in zero.
-                         */
-                        if (shift >= 8 << size) {
-                            tcg_gen_gvec_dup8i(tcg_ctx, rd_ofs, vec_size, vec_size, 0);
-                        } else {
-                            tcg_gen_gvec_shli(tcg_ctx, size, rd_ofs, rm_ofs, shift,
-                                              vec_size, vec_size);
-                        }
-                    }
-                    return 0;
-                }
-
-                if (size == 3) {
-                    count = q + 1;
-                } else {
-                    count = q ? 4: 2;
-                }
-
-                /* To avoid excessive duplication of ops we implement shift
-                 * by immediate using the variable shift operations.
-                  */
-                imm = dup_const(size, shift);
-
-                for (pass = 0; pass < count; pass++) {
-                    if (size == 3) {
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rm + pass);
-                        tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_V1, imm);
-                        switch (op) {
-                        case 2: /* VRSHR */
-                        case 3: /* VRSRA */
-                            if (u)
-                                gen_helper_neon_rshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1);
-                            else
-                                gen_helper_neon_rshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1);
-                            break;
-                        case 6: /* VQSHLU */
-                            gen_helper_neon_qshlu_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env,
-                                                      tcg_ctx->cpu_V0, tcg_ctx->cpu_V1);
-                            break;
-                        case 7: /* VQSHL */
-                            if (u) {
-                                gen_helper_neon_qshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env,
-                                                         tcg_ctx->cpu_V0, tcg_ctx->cpu_V1);
-                            } else {
-                                gen_helper_neon_qshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env,
-                                                         tcg_ctx->cpu_V0, tcg_ctx->cpu_V1);
-                            }
-                            break;
-                        default:
-                            g_assert_not_reached();
-                            break;
-                        }
-                        if (op == 3) {
-                            /* Accumulate.  */
-                            neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass);
-                            tcg_gen_add_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1);
-                        }
-                        neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass);
-                    } else { /* size < 3 */
-                        /* Operands in T0 and T1.  */
-                        tmp = neon_load_reg(tcg_ctx, rm, pass);
-                        tmp2 = tcg_temp_new_i32(tcg_ctx);
-                        tcg_gen_movi_i32(tcg_ctx, tmp2, imm);
-                        switch (op) {
-                        case 2: /* VRSHR */
-                        case 3: /* VRSRA */
-                            GEN_NEON_INTEGER_OP(rshl);
-                            break;
-                        case 6: /* VQSHLU */
-                            switch (size) {
-                            case 0:
-                                gen_helper_neon_qshlu_s8(tcg_ctx, tmp, tcg_ctx->cpu_env,
-                                                         tmp, tmp2);
-                                break;
-                            case 1:
-                                gen_helper_neon_qshlu_s16(tcg_ctx, tmp, tcg_ctx->cpu_env,
-                                                          tmp, tmp2);
-                                break;
-                            case 2:
-                                gen_helper_neon_qshlu_s32(tcg_ctx, tmp, tcg_ctx->cpu_env,
-                                                          tmp, tmp2);
-                                break;
-                            default:
-                                abort();
-                            }
-                            break;
-                        case 7: /* VQSHL */
-                            GEN_NEON_INTEGER_OP_ENV(qshl);
-                            break;
-                        default:
-                            g_assert_not_reached();
-                            break;
-                        }
-                        tcg_temp_free_i32(tcg_ctx, tmp2);
-
-                        if (op == 3) {
-                            /* Accumulate.  */
-                            tmp2 = neon_load_reg(tcg_ctx, rd, pass);
-                            gen_neon_add(tcg_ctx, size, tmp, tmp2);
-                            tcg_temp_free_i32(tcg_ctx, tmp2);
-                        }
-                        neon_store_reg(tcg_ctx, rd, pass, tmp);
-                    }
-                } /* for pass */
-            } else if (op < 10) {
-                /* Shift by immediate and narrow:
-                   VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
-                int input_unsigned = (op == 8) ? !u : u;
-                if (rm & 1) {
-                    return 1;
-                }
-                shift = shift - (1 << (size + 3));
-                size++;
-                if (size == 3) {
-                    tmp64 = tcg_const_i64(tcg_ctx, shift);
-                    neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rm);
-                    neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + 1);
-                    for (pass = 0; pass < 2; pass++) {
-                        TCGv_i64 in;
-                        if (pass == 0) {
-                            in = tcg_ctx->cpu_V0;
-                        } else {
-                            in = tcg_ctx->cpu_V1;
-                        }
-                        if (q) {
-                            if (input_unsigned) {
-                                gen_helper_neon_rshl_u64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64);
-                            } else {
-                                gen_helper_neon_rshl_s64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64);
-                            }
-                        } else {
-                            if (input_unsigned) {
-                                gen_ushl_i64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64);
-                            } else {
-                                gen_sshl_i64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64);
-                            }
-                        }
-                        tmp = tcg_temp_new_i32(tcg_ctx);
-                        gen_neon_narrow_op(tcg_ctx, op == 8, u, size - 1, tmp, tcg_ctx->cpu_V0);
-                        neon_store_reg(tcg_ctx, rd, pass, tmp);
-                    } /* for pass */
-                    tcg_temp_free_i64(tcg_ctx, tmp64);
-                } else {
-                    if (size == 1) {
-                        imm = (uint16_t)shift;
-                        imm |= imm << 16;
-                    } else {
-                        /* size == 2 */
-                        imm = (uint32_t)shift;
-                    }
-                    tmp2 = tcg_const_i32(tcg_ctx, imm);
-                    tmp4 = neon_load_reg(tcg_ctx, rm + 1, 0);
-                    tmp5 = neon_load_reg(tcg_ctx, rm + 1, 1);
-                    for (pass = 0; pass < 2; pass++) {
-                        if (pass == 0) {
-                            tmp = neon_load_reg(tcg_ctx, rm, 0);
-                        } else {
-                            tmp = tmp4;
-                        }
-                        gen_neon_shift_narrow(tcg_ctx, size, tmp, tmp2, q,
-                                              input_unsigned);
-                        if (pass == 0) {
-                            tmp3 = neon_load_reg(tcg_ctx, rm, 1);
-                        } else {
-                            tmp3 = tmp5;
-                        }
-                        gen_neon_shift_narrow(tcg_ctx, size, tmp3, tmp2, q,
-                                              input_unsigned);
-                        tcg_gen_concat_i32_i64(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp3);
-                        tcg_temp_free_i32(tcg_ctx, tmp);
-                        tcg_temp_free_i32(tcg_ctx, tmp3);
-                        tmp = tcg_temp_new_i32(tcg_ctx);
-                        gen_neon_narrow_op(tcg_ctx, op == 8, u, size - 1, tmp, tcg_ctx->cpu_V0);
-                        neon_store_reg(tcg_ctx, rd, pass, tmp);
-                    } /* for pass */
-                    tcg_temp_free_i32(tcg_ctx, tmp2);
-                }
-            } else if (op == 10) {
-                /* VSHLL, VMOVL */
-                if (q || (rd & 1)) {
-                    return 1;
-                }
-                tmp = neon_load_reg(tcg_ctx, rm, 0);
-                tmp2 = neon_load_reg(tcg_ctx, rm, 1);
-                for (pass = 0; pass < 2; pass++) {
-                    if (pass == 1)
-                        tmp = tmp2;
-
-                    gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, u);
-
-                    if (shift != 0) {
-                        /* The shift is less than the width of the source
-                           type, so we can just shift the whole register.  */
-                        tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, shift);
-                        /* Widen the result of shift: we need to clear
-                         * the potential overflow bits resulting from
-                         * left bits of the narrow input appearing as
-                         * right bits of left the neighbour narrow
-                         * input.  */
-                        if (size < 2 || !u) {
-                            uint64_t imm64;
-                            if (size == 0) {
-                                imm = (0xffu >> (8 - shift));
-                                imm |= imm << 16;
-                            } else if (size == 1) {
-                                imm = 0xffff >> (16 - shift);
-                            } else {
-                                /* size == 2 */
-                                imm = 0xffffffff >> (32 - shift);
-                            }
-                            if (size < 2) {
-                                imm64 = imm | (((uint64_t)imm) << 32);
-                            } else {
-                                imm64 = imm;
-                            }
-                            tcg_gen_andi_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, ~imm64);
-                        }
-                    }
-                    neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass);
-                }
-            } else if (op >= 14) {
-                /* VCVT fixed-point.  */
-                TCGv_ptr fpst;
-                TCGv_i32 shiftv;
-                VFPGenFixPointFn *fn;
-
-                if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
-                    return 1;
-                }
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+    };
+    static const GVecGen4 ops[4] = {
+        { .fniv = gen_sqsub_vec,
+          .fno = gen_helper_gvec_sqsub_b,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_8 },
+        { .fniv = gen_sqsub_vec,
+          .fno = gen_helper_gvec_sqsub_h,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_16 },
+        { .fniv = gen_sqsub_vec,
+          .fno = gen_helper_gvec_sqsub_s,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_32 },
+        { .fniv = gen_sqsub_vec,
+          .fno = gen_helper_gvec_sqsub_d,
+          .opt_opc = vecop_list,
+          .write_aofs = true,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc),
+                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
 
-                if (!(op & 1)) {
-                    if (u) {
-                        fn = gen_helper_vfp_ultos;
-                    } else {
-                        fn = gen_helper_vfp_sltos;
-                    }
-                } else {
-                    if (u) {
-                        fn = gen_helper_vfp_touls_round_to_zero;
-                    } else {
-                        fn = gen_helper_vfp_tosls_round_to_zero;
-                    }
-                }
+static void gen_sabd_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
 
-                /* We have already masked out the must-be-1 top bit of imm6,
-                 * hence this 32-shift where the ARM ARM has 64-imm6.
-                 */
-                shift = 32 - shift;
-                fpst = get_fpstatus_ptr(tcg_ctx, 1);
-                shiftv = tcg_const_i32(tcg_ctx, shift);
-                for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                    TCGv_i32 tmpf = neon_load_reg(tcg_ctx, rm, pass);
-                    fn(tcg_ctx, tmpf, tmpf, shiftv, fpst);
-                    neon_store_reg(tcg_ctx, rd, pass, tmpf);
-                }
-                tcg_temp_free_ptr(tcg_ctx, fpst);
-                tcg_temp_free_i32(tcg_ctx, shiftv);
-            } else {
-                return 1;
-            }
-        } else { /* (insn & 0x00380080) == 0 */
-            int invert, reg_ofs, vec_size;
+    tcg_gen_sub_i32(tcg_ctx, t, a, b);
+    tcg_gen_sub_i32(tcg_ctx, d, b, a);
+    tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LT, d, a, b, d, t);
+    tcg_temp_free_i32(tcg_ctx, t);
+}
 
-            if (q && (rd & 1)) {
-                return 1;
-            }
+static void gen_sabd_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
-            op = (insn >> 8) & 0xf;
-            /* One register and immediate.  */
-            imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
-            invert = (insn & (1 << 5)) != 0;
-            /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
-             * We choose to not special-case this and will behave as if a
-             * valid constant encoding of 0 had been given.
-             */
-            switch (op) {
-            case 0: case 1:
-                /* no-op */
-                break;
-            case 2: case 3:
-                imm <<= 8;
-                break;
-            case 4: case 5:
-                imm <<= 16;
-                break;
-            case 6: case 7:
-                imm <<= 24;
-                break;
-            case 8: case 9:
-                imm |= imm << 16;
-                break;
-            case 10: case 11:
-                imm = (imm << 8) | (imm << 24);
-                break;
-            case 12:
-                imm = (imm << 8) | 0xff;
-                break;
-            case 13:
-                imm = (imm << 16) | 0xffff;
-                break;
-            case 14:
-                imm |= (imm << 8) | (imm << 16) | (imm << 24);
-                if (invert) {
-                    imm = ~imm;
-                }
-                break;
-            case 15:
-                if (invert) {
-                    return 1;
-                }
-                imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
-                      | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
-                break;
-            }
-            if (invert) {
-                imm = ~imm;
-            }
+    tcg_gen_sub_i64(tcg_ctx, t, a, b);
+    tcg_gen_sub_i64(tcg_ctx, d, b, a);
+    tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LT, d, a, b, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
+}
 
-            reg_ofs = neon_reg_offset(rd, 0);
-            vec_size = q ? 16 : 8;
+static void gen_sabd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
 
-            if (op & 1 && op < 12) {
-                if (invert) {
-                    /* The immediate value has already been inverted,
-                     * so BIC becomes AND.
-                     */
-                    tcg_gen_gvec_andi(tcg_ctx, MO_32, reg_ofs, reg_ofs, imm,
-                                      vec_size, vec_size);
-                } else {
-                    tcg_gen_gvec_ori(tcg_ctx, MO_32, reg_ofs, reg_ofs, imm,
-                                     vec_size, vec_size);
-                }
-            } else {
-                /* VMOV, VMVN.  */
-                if (op == 14 && invert) {
-                    TCGv_i64 t64 = tcg_temp_new_i64(tcg_ctx);
-
-                    for (pass = 0; pass <= q; ++pass) {
-                        uint64_t val = 0;
-                        int n;
-
-                        for (n = 0; n < 8; n++) {
-                            if (imm & (1 << (n + pass * 8))) {
-                                val |= 0xffull << (n * 8);
-                            }
-                        }
-                        tcg_gen_movi_i64(tcg_ctx, t64, val);
-                        neon_store_reg64(tcg_ctx, t64, rd + pass);
-                    }
-                    tcg_temp_free_i64(tcg_ctx, t64);
-                } else {
-                    tcg_gen_gvec_dup32i(tcg_ctx, reg_ofs, vec_size, vec_size, imm);
-                }
-            }
-        }
-    } else { /* (insn & 0x00800010 == 0x00800000) */
-        if (size != 3) {
-            op = (insn >> 8) & 0xf;
-            if ((insn & (1 << 6)) == 0) {
-                /* Three registers of different lengths.  */
-                int src1_wide;
-                int src2_wide;
-                int prewiden;
-                /* undefreq: bit 0 : UNDEF if size == 0
-                 *           bit 1 : UNDEF if size == 1
-                 *           bit 2 : UNDEF if size == 2
-                 *           bit 3 : UNDEF if U == 1
-                 * Note that [2:0] set implies 'always UNDEF'
-                 */
-                int undefreq;
-                /* prewiden, src1_wide, src2_wide, undefreq */
-                static const int neon_3reg_wide[16][4] = {
-                    {1, 0, 0, 0}, /* VADDL */
-                    {1, 1, 0, 0}, /* VADDW */
-                    {1, 0, 0, 0}, /* VSUBL */
-                    {1, 1, 0, 0}, /* VSUBW */
-                    {0, 1, 1, 0}, /* VADDHN */
-                    {0, 0, 0, 0}, /* VABAL */
-                    {0, 1, 1, 0}, /* VSUBHN */
-                    {0, 0, 0, 0}, /* VABDL */
-                    {0, 0, 0, 0}, /* VMLAL */
-                    {0, 0, 0, 9}, /* VQDMLAL */
-                    {0, 0, 0, 0}, /* VMLSL */
-                    {0, 0, 0, 9}, /* VQDMLSL */
-                    {0, 0, 0, 0}, /* Integer VMULL */
-                    {0, 0, 0, 1}, /* VQDMULL */
-                    {0, 0, 0, 0xa}, /* Polynomial VMULL */
-                    {0, 0, 0, 7}, /* Reserved: always UNDEF */
-                };
-
-                prewiden = neon_3reg_wide[op][0];
-                src1_wide = neon_3reg_wide[op][1];
-                src2_wide = neon_3reg_wide[op][2];
-                undefreq = neon_3reg_wide[op][3];
-
-                if ((undefreq & (1 << size)) ||
-                    ((undefreq & 8) && u)) {
-                    return 1;
-                }
-                if ((src1_wide && (rn & 1)) ||
-                    (src2_wide && (rm & 1)) ||
-                    (!src2_wide && (rd & 1))) {
-                    return 1;
-                }
+    tcg_gen_smin_vec(tcg_ctx, vece, t, a, b);
+    tcg_gen_smax_vec(tcg_ctx, vece, d, a, b);
+    tcg_gen_sub_vec(tcg_ctx, vece, d, d, t);
+    tcg_temp_free_vec(tcg_ctx, t);
+}
 
-                /* Handle polynomial VMULL in a single pass.  */
-                if (op == 14) {
-                    if (size == 0) {
-                        /* VMULL.P8 */
-                        tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, 16, 16,
-                                           0, gen_helper_neon_pmull_h);
-                    } else {
-                        /* VMULL.P64 */
-                        if (!dc_isar_feature(aa32_pmull, s)) {
-                            return 1;
-                        }
-                        tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, 16, 16,
-                                           0, gen_helper_gvec_pmull_q);
-                    }
-                    return 0;
-                }
+void gen_gvec_sabd(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+    };
+    static const GVecGen3 ops[4] = {
+        { .fniv = gen_sabd_vec,
+          .fno = gen_helper_gvec_sabd_b,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fniv = gen_sabd_vec,
+          .fno = gen_helper_gvec_sabd_h,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_sabd_i32,
+          .fniv = gen_sabd_vec,
+          .fno = gen_helper_gvec_sabd_s,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_sabd_i64,
+          .fniv = gen_sabd_vec,
+          .fno = gen_helper_gvec_sabd_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
 
-                /* Avoid overlapping operands.  Wide source operands are
-                   always aligned so will never overlap with wide
-                   destinations in problematic ways.  */
-                if (rd == rm && !src2_wide) {
-                    tmp = neon_load_reg(tcg_ctx, rm, 1);
-                    neon_store_scratch(tcg_ctx, 2, tmp);
-                } else if (rd == rn && !src1_wide) {
-                    tmp = neon_load_reg(tcg_ctx, rn, 1);
-                    neon_store_scratch(tcg_ctx, 2, tmp);
-                }
-                tmp3 = NULL;
-                for (pass = 0; pass < 2; pass++) {
-                    if (src1_wide) {
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + pass);
-                        tmp = NULL;
-                    } else {
-                        if (pass == 1 && rd == rn) {
-                            tmp = neon_load_scratch(tcg_ctx, 2);
-                        } else {
-                            tmp = neon_load_reg(tcg_ctx, rn, pass);
-                        }
-                        if (prewiden) {
-                            gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, u);
-                        }
-                    }
-                    if (src2_wide) {
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + pass);
-                        tmp2 = NULL;
-                    } else {
-                        if (pass == 1 && rd == rm) {
-                            tmp2 = neon_load_scratch(tcg_ctx, 2);
-                        } else {
-                            tmp2 = neon_load_reg(tcg_ctx, rm, pass);
-                        }
-                        if (prewiden) {
-                            gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V1, tmp2, size, u);
-                        }
-                    }
-                    switch (op) {
-                    case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
-                        gen_neon_addl(tcg_ctx, size);
-                        break;
-                    case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
-                        gen_neon_subl(tcg_ctx, size);
-                        break;
-                    case 5: case 7: /* VABAL, VABDL */
-                        switch ((size << 1) | u) {
-                        case 0:
-                            gen_helper_neon_abdl_s16(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2);
-                            break;
-                        case 1:
-                            gen_helper_neon_abdl_u16(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2);
-                            break;
-                        case 2:
-                            gen_helper_neon_abdl_s32(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2);
-                            break;
-                        case 3:
-                            gen_helper_neon_abdl_u32(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2);
-                            break;
-                        case 4:
-                            gen_helper_neon_abdl_s64(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2);
-                            break;
-                        case 5:
-                            gen_helper_neon_abdl_u64(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2);
-                            break;
-                        default: abort();
-                        }
-                        tcg_temp_free_i32(tcg_ctx, tmp2);
-                        tcg_temp_free_i32(tcg_ctx, tmp);
-                        break;
-                    case 8: case 9: case 10: case 11: case 12: case 13:
-                        /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
-                        gen_neon_mull(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2, size, u);
-                        break;
-                    default: /* 15 is RESERVED: caught earlier  */
-                        abort();
-                    }
-                    if (op == 13) {
-                        /* VQDMULL */
-                        gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size);
-                        neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass);
-                    } else if (op == 5 || (op >= 8 && op <= 11)) {
-                        /* Accumulate.  */
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass);
-                        switch (op) {
-                        case 10: /* VMLSL */
-                            gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size);
-                            /* Fall through */
-                        case 5: case 8: /* VABAL, VMLAL */
-                            gen_neon_addl(tcg_ctx, size);
-                            break;
-                        case 9: case 11: /* VQDMLAL, VQDMLSL */
-                            gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size);
-                            if (op == 11) {
-                                gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size);
-                            }
-                            gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, size);
-                            break;
-                        default:
-                            abort();
-                        }
-                        neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass);
-                    } else if (op == 4 || op == 6) {
-                        /* Narrowing operation.  */
-                        tmp = tcg_temp_new_i32(tcg_ctx);
-                        if (!u) {
-                            switch (size) {
-                            case 0:
-                                gen_helper_neon_narrow_high_u8(tcg_ctx, tmp, tcg_ctx->cpu_V0);
-                                break;
-                            case 1:
-                                gen_helper_neon_narrow_high_u16(tcg_ctx, tmp, tcg_ctx->cpu_V0);
-                                break;
-                            case 2:
-                                tcg_gen_extrh_i64_i32(tcg_ctx, tmp, tcg_ctx->cpu_V0);
-                                break;
-                            default: abort();
-                            }
-                        } else {
-                            switch (size) {
-                            case 0:
-                                gen_helper_neon_narrow_round_high_u8(tcg_ctx, tmp, tcg_ctx->cpu_V0);
-                                break;
-                            case 1:
-                                gen_helper_neon_narrow_round_high_u16(tcg_ctx, tmp, tcg_ctx->cpu_V0);
-                                break;
-                            case 2:
-                                tcg_gen_addi_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, 1u << 31);
-                                tcg_gen_extrh_i64_i32(tcg_ctx, tmp, tcg_ctx->cpu_V0);
-                                break;
-                            default: abort();
-                            }
-                        }
-                        if (pass == 0) {
-                            tmp3 = tmp;
-                        } else {
-                            neon_store_reg(tcg_ctx, rd, 0, tmp3);
-                            neon_store_reg(tcg_ctx, rd, 1, tmp);
-                        }
-                    } else {
-                        /* Write back the result.  */
-                        neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass);
-                    }
-                }
-            } else {
-                /* Two registers and a scalar. NB that for ops of this form
-                 * the ARM ARM labels bit 24 as Q, but it is in our variable
-                 * 'u', not 'q'.
-                 */
-                if (size == 0) {
-                    return 1;
-                }
-                switch (op) {
-                case 1: /* Float VMLA scalar */
-                case 5: /* Floating point VMLS scalar */
-                case 9: /* Floating point VMUL scalar */
-                    if (size == 1) {
-                        return 1;
-                    }
-                    /* fall through */
-                case 0: /* Integer VMLA scalar */
-                case 4: /* Integer VMLS scalar */
-                case 8: /* Integer VMUL scalar */
-                case 12: /* VQDMULH scalar */
-                case 13: /* VQRDMULH scalar */
-                    if (u && ((rd | rn) & 1)) {
-                        return 1;
-                    }
-                    tmp = neon_get_scalar(tcg_ctx, size, rm);
-                    neon_store_scratch(tcg_ctx, 0, tmp);
-                    for (pass = 0; pass < (u ? 4 : 2); pass++) {
-                        tmp = neon_load_scratch(tcg_ctx, 0);
-                        tmp2 = neon_load_reg(tcg_ctx, rn, pass);
-                        if (op == 12) {
-                            if (size == 1) {
-                                gen_helper_neon_qdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
-                            } else {
-                                gen_helper_neon_qdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
-                            }
-                        } else if (op == 13) {
-                            if (size == 1) {
-                                gen_helper_neon_qrdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
-                            } else {
-                                gen_helper_neon_qrdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
-                            }
-                        } else if (op & 1) {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            gen_helper_vfp_muls(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                        } else {
-                            switch (size) {
-                            case 0: gen_helper_neon_mul_u8(tcg_ctx, tmp, tmp, tmp2); break;
-                            case 1: gen_helper_neon_mul_u16(tcg_ctx, tmp, tmp, tmp2); break;
-                            case 2: tcg_gen_mul_i32(tcg_ctx, tmp, tmp, tmp2); break;
-                            default: abort();
-                            }
-                        }
-                        tcg_temp_free_i32(tcg_ctx, tmp2);
-                        if (op < 8) {
-                            /* Accumulate.  */
-                            tmp2 = neon_load_reg(tcg_ctx, rd, pass);
-                            switch (op) {
-                            case 0:
-                                gen_neon_add(tcg_ctx, size, tmp, tmp2);
-                                break;
-                            case 1:
-                            {
-                                TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                                gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                                tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                                break;
-                            }
-                            case 4:
-                                gen_neon_rsb(tcg_ctx, size, tmp, tmp2);
-                                break;
-                            case 5:
-                            {
-                                TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                                gen_helper_vfp_subs(tcg_ctx, tmp, tmp2, tmp, fpstatus);
-                                tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                                break;
-                            }
-                            default:
-                                abort();
-                            }
-                            tcg_temp_free_i32(tcg_ctx, tmp2);
-                        }
-                        neon_store_reg(tcg_ctx, rd, pass, tmp);
-                    }
-                    break;
-                case 3: /* VQDMLAL scalar */
-                case 7: /* VQDMLSL scalar */
-                case 11: /* VQDMULL scalar */
-                    if (u == 1) {
-                        return 1;
-                    }
-                    /* fall through */
-                case 2: /* VMLAL sclar */
-                case 6: /* VMLSL scalar */
-                case 10: /* VMULL scalar */
-                    if (rd & 1) {
-                        return 1;
-                    }
-                    tmp2 = neon_get_scalar(tcg_ctx, size, rm);
-                    /* We need a copy of tmp2 because gen_neon_mull
-                     * deletes it during pass 0.  */
-                    tmp4 = tcg_temp_new_i32(tcg_ctx);
-                    tcg_gen_mov_i32(tcg_ctx, tmp4, tmp2);
-                    tmp3 = neon_load_reg(tcg_ctx, rn, 1);
-
-                    for (pass = 0; pass < 2; pass++) {
-                        if (pass == 0) {
-                            tmp = neon_load_reg(tcg_ctx, rn, 0);
-                        } else {
-                            tmp = tmp3;
-                            tmp2 = tmp4;
-                        }
-                        gen_neon_mull(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2, size, u);
-                        if (op != 11) {
-                            neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass);
-                        }
-                        switch (op) {
-                        case 6:
-                            gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size);
-                            /* Fall through */
-                        case 2:
-                            gen_neon_addl(tcg_ctx, size);
-                            break;
-                        case 3: case 7:
-                            gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size);
-                            if (op == 7) {
-                                gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size);
-                            }
-                            gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, size);
-                            break;
-                        case 10:
-                            /* no-op */
-                            break;
-                        case 11:
-                            gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size);
-                            break;
-                        default:
-                            abort();
-                        }
-                        neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass);
-                    }
-                    break;
-                case 14: /* VQRDMLAH scalar */
-                case 15: /* VQRDMLSH scalar */
-                    {
-                        NeonGenThreeOpEnvFn *fn;
-
-                        if (!dc_isar_feature(aa32_rdm, s)) {
-                            return 1;
-                        }
-                        if (u && ((rd | rn) & 1)) {
-                            return 1;
-                        }
-                        if (op == 14) {
-                            if (size == 1) {
-                                fn = gen_helper_neon_qrdmlah_s16;
-                            } else {
-                                fn = gen_helper_neon_qrdmlah_s32;
-                            }
-                        } else {
-                            if (size == 1) {
-                                fn = gen_helper_neon_qrdmlsh_s16;
-                            } else {
-                                fn = gen_helper_neon_qrdmlsh_s32;
-                            }
-                        }
-
-                        tmp2 = neon_get_scalar(tcg_ctx, size, rm);
-                        for (pass = 0; pass < (u ? 4 : 2); pass++) {
-                            tmp = neon_load_reg(tcg_ctx, rn, pass);
-                            tmp3 = neon_load_reg(tcg_ctx, rd, pass);
-                            fn(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2, tmp3);
-                            tcg_temp_free_i32(tcg_ctx, tmp3);
-                            neon_store_reg(tcg_ctx, rd, pass, tmp);
-                        }
-                        tcg_temp_free_i32(tcg_ctx, tmp2);
-                    }
-                    break;
-                default:
-                    g_assert_not_reached();
-                    break;
-                }
-            }
-        } else { /* size == 3 */
-            if (!u) {
-                /* Extract.  */
-                imm = (insn >> 8) & 0xf;
+static void gen_uabd_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
 
-                if (imm > 7 && !q)
-                    return 1;
+    tcg_gen_sub_i32(tcg_ctx, t, a, b);
+    tcg_gen_sub_i32(tcg_ctx, d, b, a);
+    tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LTU, d, a, b, d, t);
+    tcg_temp_free_i32(tcg_ctx, t);
+}
 
-                if (q && ((rd | rn | rm) & 1)) {
-                    return 1;
-                }
+static void gen_uabd_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
-                if (imm == 0) {
-                    neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn);
-                    if (q) {
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rn + 1);
-                    }
-                } else if (imm == 8) {
-                    neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + 1);
-                    if (q) {
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm);
-                    }
-                } else if (q) {
-                    tmp64 = tcg_temp_new_i64(tcg_ctx);
-                    if (imm < 8) {
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn);
-                        neon_load_reg64(tcg_ctx, tmp64, rn + 1);
-                    } else {
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + 1);
-                        neon_load_reg64(tcg_ctx, tmp64, rm);
-                    }
-                    tcg_gen_shri_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, (imm & 7) * 8);
-                    tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V1, tmp64, 64 - ((imm & 7) * 8));
-                    tcg_gen_or_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1);
-                    if (imm < 8) {
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm);
-                    } else {
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + 1);
-                        imm -= 8;
-                    }
-                    tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V1, tcg_ctx->cpu_V1, 64 - (imm * 8));
-                    tcg_gen_shri_i64(tcg_ctx, tmp64, tmp64, imm * 8);
-                    tcg_gen_or_i64(tcg_ctx, tcg_ctx->cpu_V1, tcg_ctx->cpu_V1, tmp64);
-                    tcg_temp_free_i64(tcg_ctx, tmp64);
-                } else {
-                    /* BUGFIX */
-                    neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn);
-                    tcg_gen_shri_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, imm * 8);
-                    neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm);
-                    tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V1, tcg_ctx->cpu_V1, 64 - (imm * 8));
-                    tcg_gen_or_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1);
-                }
-                neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd);
-                if (q) {
-                    neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + 1);
-                }
-            } else if ((insn & (1 << 11)) == 0) {
-                /* Two register misc.  */
-                op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
-                size = (insn >> 18) & 3;
-                /* UNDEF for unknown op values and bad op-size combinations */
-                if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
-                    return 1;
-                }
-                if (neon_2rm_is_v8_op(op) &&
-                    !arm_dc_feature(s, ARM_FEATURE_V8)) {
-                    return 1;
-                }
-                if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
-                    q && ((rm | rd) & 1)) {
-                    return 1;
-                }
-                switch (op) {
-                case NEON_2RM_VREV64:
-                    for (pass = 0; pass < (q ? 2 : 1); pass++) {
-                        tmp = neon_load_reg(tcg_ctx, rm, pass * 2);
-                        tmp2 = neon_load_reg(tcg_ctx, rm, pass * 2 + 1);
-                        switch (size) {
-                        case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp, tmp); break;
-                        case 1: gen_swap_half(tcg_ctx, tmp); break;
-                        case 2: /* no-op */ break;
-                        default: abort();
-                        }
-                        neon_store_reg(tcg_ctx, rd, pass * 2 + 1, tmp);
-                        if (size == 2) {
-                            neon_store_reg(tcg_ctx, rd, pass * 2, tmp2);
-                        } else {
-                            switch (size) {
-                            case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp2, tmp2); break;
-                            case 1: gen_swap_half(tcg_ctx, tmp2); break;
-                            default: abort();
-                            }
-                            neon_store_reg(tcg_ctx, rd, pass * 2, tmp2);
-                        }
-                    }
-                    break;
-                case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
-                case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
-                    for (pass = 0; pass < q + 1; pass++) {
-                        tmp = neon_load_reg(tcg_ctx, rm, pass * 2);
-                        gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, op & 1);
-                        tmp = neon_load_reg(tcg_ctx, rm, pass * 2 + 1);
-                        gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V1, tmp, size, op & 1);
-                        switch (size) {
-                        case 0: gen_helper_neon_paddl_u16(tcg_ctx, CPU_V001); break;
-                        case 1: gen_helper_neon_paddl_u32(tcg_ctx, CPU_V001); break;
-                        case 2: tcg_gen_add_i64(tcg_ctx, CPU_V001); break;
-                        default: abort();
-                        }
-                        if (op >= NEON_2RM_VPADAL) {
-                            /* Accumulate.  */
-                            neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass);
-                            gen_neon_addl(tcg_ctx, size);
-                        }
-                        neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass);
-                    }
-                    break;
-                case NEON_2RM_VTRN:
-                    if (size == 2) {
-                        int n;
-                        for (n = 0; n < (q ? 4 : 2); n += 2) {
-                            tmp = neon_load_reg(tcg_ctx, rm, n);
-                            tmp2 = neon_load_reg(tcg_ctx, rd, n + 1);
-                            neon_store_reg(tcg_ctx, rm, n, tmp2);
-                            neon_store_reg(tcg_ctx, rd, n + 1, tmp);
-                        }
-                    } else {
-                        goto elementwise;
-                    }
-                    break;
-                case NEON_2RM_VUZP:
-                    if (gen_neon_unzip(tcg_ctx, rd, rm, size, q)) {
-                        return 1;
-                    }
-                    break;
-                case NEON_2RM_VZIP:
-                    if (gen_neon_zip(tcg_ctx, rd, rm, size, q)) {
-                        return 1;
-                    }
-                    break;
-                case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
-                    /* also VQMOVUN; op field and mnemonics don't line up */
-                    if (rm & 1) {
-                        return 1;
-                    }
-                    tmp2 = NULL;
-                    for (pass = 0; pass < 2; pass++) {
-                        neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rm + pass);
-                        tmp = tcg_temp_new_i32(tcg_ctx);
-                        gen_neon_narrow_op(tcg_ctx, op == NEON_2RM_VMOVN, q, size,
-                                           tmp, tcg_ctx->cpu_V0);
-                        if (pass == 0) {
-                            tmp2 = tmp;
-                        } else {
-                            neon_store_reg(tcg_ctx, rd, 0, tmp2);
-                            neon_store_reg(tcg_ctx, rd, 1, tmp);
-                        }
-                    }
-                    break;
-                case NEON_2RM_VSHLL:
-                    if (q || (rd & 1)) {
-                        return 1;
-                    }
-                    tmp = neon_load_reg(tcg_ctx, rm, 0);
-                    tmp2 = neon_load_reg(tcg_ctx, rm, 1);
-                    for (pass = 0; pass < 2; pass++) {
-                        if (pass == 1)
-                            tmp = tmp2;
-                        gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, 1);
-                        tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, 8 << size);
-                        neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass);
-                    }
-                    break;
-                case NEON_2RM_VCVT_F16_F32:
-                {
-                    TCGv_ptr fpst;
-                    TCGv_i32 ahp;
-
-                    if (!dc_isar_feature(aa32_fp16_spconv, s) ||
-                        q || (rm & 1)) {
-                        return 1;
-                    }
-                    fpst = get_fpstatus_ptr(tcg_ctx, true);
-                    ahp = get_ahp_flag(tcg_ctx);
-                    tmp = neon_load_reg(tcg_ctx, rm, 0);
-                    gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp);
-                    tmp2 = neon_load_reg(tcg_ctx, rm, 1);
-                    gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp);
-                    tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16);
-                    tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp);
-                    tcg_temp_free_i32(tcg_ctx, tmp);
-                    tmp = neon_load_reg(tcg_ctx, rm, 2);
-                    gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp);
-                    tmp3 = neon_load_reg(tcg_ctx, rm, 3);
-                    neon_store_reg(tcg_ctx, rd, 0, tmp2);
-                    gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp);
-                    tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16);
-                    tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp);
-                    neon_store_reg(tcg_ctx, rd, 1, tmp3);
-                    tcg_temp_free_i32(tcg_ctx, tmp);
-                    tcg_temp_free_i32(tcg_ctx, ahp);
-                    tcg_temp_free_ptr(tcg_ctx, fpst);
-                    break;
-                }
-                case NEON_2RM_VCVT_F32_F16:
-                {
-                    TCGv_ptr fpst;
-                    TCGv_i32 ahp;
-                    if (!dc_isar_feature(aa32_fp16_spconv, s) ||
-                        q || (rd & 1)) {
-                        return 1;
-                    }
-                    fpst = get_fpstatus_ptr(tcg_ctx, true);
-                    ahp = get_ahp_flag(tcg_ctx);
-                    tmp3 = tcg_temp_new_i32(tcg_ctx);
-                    tmp = neon_load_reg(tcg_ctx, rm, 0);
-                    tmp2 = neon_load_reg(tcg_ctx, rm, 1);
-                    tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp);
-                    gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp);
-                    neon_store_reg(tcg_ctx, rd, 0, tmp3);
-                    tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16);
-                    gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp);
-                    neon_store_reg(tcg_ctx, rd, 1, tmp);
-                    tmp3 = tcg_temp_new_i32(tcg_ctx);
-                    tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2);
-                    gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp);
-                    neon_store_reg(tcg_ctx, rd, 2, tmp3);
-                    tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16);
-                    gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp);
-                    neon_store_reg(tcg_ctx, rd, 3, tmp2);
-                    tcg_temp_free_i32(tcg_ctx, ahp);
-                    tcg_temp_free_ptr(tcg_ctx, fpst);
-                    break;
-                }
-                case NEON_2RM_AESE: case NEON_2RM_AESMC:
-                    if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
-                        return 1;
-                    }
-                    ptr1 = vfp_reg_ptr(tcg_ctx, true, rd);
-                    ptr2 = vfp_reg_ptr(tcg_ctx, true, rm);
-
-                     /* Bit 6 is the lowest opcode bit; it distinguishes between
-                      * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
-                      */
-                    tmp3 = tcg_const_i32(tcg_ctx, extract32(insn, 6, 1));
-
-                    if (op == NEON_2RM_AESE) {
-                        gen_helper_crypto_aese(tcg_ctx, ptr1, ptr2, tmp3);
-                    } else {
-                        gen_helper_crypto_aesmc(tcg_ctx, ptr1, ptr2, tmp3);
-                    }
-                    tcg_temp_free_ptr(tcg_ctx, ptr1);
-                    tcg_temp_free_ptr(tcg_ctx, ptr2);
-                    tcg_temp_free_i32(tcg_ctx, tmp3);
-                    break;
-                case NEON_2RM_SHA1H:
-                    if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
-                        return 1;
-                    }
-                    ptr1 = vfp_reg_ptr(tcg_ctx, true, rd);
-                    ptr2 = vfp_reg_ptr(tcg_ctx, true, rm);
+    tcg_gen_sub_i64(tcg_ctx, t, a, b);
+    tcg_gen_sub_i64(tcg_ctx, d, b, a);
+    tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LTU, d, a, b, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
+}
 
-                    gen_helper_crypto_sha1h(tcg_ctx, ptr1, ptr2);
+static void gen_uabd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
 
-                    tcg_temp_free_ptr(tcg_ctx, ptr1);
-                    tcg_temp_free_ptr(tcg_ctx, ptr2);
-                    break;
-                case NEON_2RM_SHA1SU1:
-                    if ((rm | rd) & 1) {
-                            return 1;
-                    }
-                    /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
-                    if (q) {
-                        if (!dc_isar_feature(aa32_sha2, s)) {
-                            return 1;
-                        }
-                    } else if (!dc_isar_feature(aa32_sha1, s)) {
-                        return 1;
-                    }
-                    ptr1 = vfp_reg_ptr(tcg_ctx, true, rd);
-                    ptr2 = vfp_reg_ptr(tcg_ctx, true, rm);
-                    if (q) {
-                        gen_helper_crypto_sha256su0(tcg_ctx, ptr1, ptr2);
-                    } else {
-                        gen_helper_crypto_sha1su1(tcg_ctx, ptr1, ptr2);
-                    }
-                    tcg_temp_free_ptr(tcg_ctx, ptr1);
-                    tcg_temp_free_ptr(tcg_ctx, ptr2);
-                    break;
-
-                case NEON_2RM_VMVN:
-                    tcg_gen_gvec_not(tcg_ctx, 0, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-                case NEON_2RM_VNEG:
-                    tcg_gen_gvec_neg(tcg_ctx, size, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-                case NEON_2RM_VABS:
-                    tcg_gen_gvec_abs(tcg_ctx, size, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-
-                default:
-                elementwise:
-                    for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                        tmp = neon_load_reg(tcg_ctx, rm, pass);
-                        switch (op) {
-                        case NEON_2RM_VREV32:
-                            switch (size) {
-                            case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp, tmp); break;
-                            case 1: gen_swap_half(tcg_ctx, tmp); break;
-                            default: abort();
-                            }
-                            break;
-                        case NEON_2RM_VREV16:
-                            gen_rev16(tcg_ctx, tmp, tmp);
-                            break;
-                        case NEON_2RM_VCLS:
-                            switch (size) {
-                            case 0: gen_helper_neon_cls_s8(tcg_ctx, tmp, tmp); break;
-                            case 1: gen_helper_neon_cls_s16(tcg_ctx, tmp, tmp); break;
-                            case 2: gen_helper_neon_cls_s32(tcg_ctx, tmp, tmp); break;
-                            default: abort();
-                            }
-                            break;
-                        case NEON_2RM_VCLZ:
-                            switch (size) {
-                            case 0: gen_helper_neon_clz_u8(tcg_ctx, tmp, tmp); break;
-                            case 1: gen_helper_neon_clz_u16(tcg_ctx, tmp, tmp); break;
-                            case 2: tcg_gen_clzi_i32(tcg_ctx, tmp, tmp, 32); break;
-                            default: abort();
-                            }
-                            break;
-                        case NEON_2RM_VCNT:
-                            gen_helper_neon_cnt_u8(tcg_ctx, tmp, tmp);
-                            break;
-                        case NEON_2RM_VQABS:
-                            switch (size) {
-                            case 0:
-                                gen_helper_neon_qabs_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp);
-                                break;
-                            case 1:
-                                gen_helper_neon_qabs_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp);
-                                break;
-                            case 2:
-                                gen_helper_neon_qabs_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp);
-                                break;
-                            default: abort();
-                            }
-                            break;
-                        case NEON_2RM_VQNEG:
-                            switch (size) {
-                            case 0:
-                                gen_helper_neon_qneg_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp);
-                                break;
-                            case 1:
-                                gen_helper_neon_qneg_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp);
-                                break;
-                            case 2:
-                                gen_helper_neon_qneg_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp);
-                                break;
-                            default: abort();
-                            }
-                            break;
-                        case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
-                            tmp2 = tcg_const_i32(tcg_ctx, 0);
-                            switch(size) {
-                            case 0: gen_helper_neon_cgt_s8(tcg_ctx, tmp, tmp, tmp2); break;
-                            case 1: gen_helper_neon_cgt_s16(tcg_ctx, tmp, tmp, tmp2); break;
-                            case 2: gen_helper_neon_cgt_s32(tcg_ctx, tmp, tmp, tmp2); break;
-                            default: abort();
-                            }
-                            tcg_temp_free_i32(tcg_ctx, tmp2);
-                            if (op == NEON_2RM_VCLE0) {
-                                tcg_gen_not_i32(tcg_ctx, tmp, tmp);
-                            }
-                            break;
-                        case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
-                            tmp2 = tcg_const_i32(tcg_ctx, 0);
-                            switch(size) {
-                            case 0: gen_helper_neon_cge_s8(tcg_ctx, tmp, tmp, tmp2); break;
-                            case 1: gen_helper_neon_cge_s16(tcg_ctx, tmp, tmp, tmp2); break;
-                            case 2: gen_helper_neon_cge_s32(tcg_ctx, tmp, tmp, tmp2); break;
-                            default: abort();
-                            }
-                            tcg_temp_free_i32(tcg_ctx, tmp2);
-                            if (op == NEON_2RM_VCLT0) {
-                                tcg_gen_not_i32(tcg_ctx, tmp, tmp);
-                            }
-                            break;
-                        case NEON_2RM_VCEQ0:
-                            tmp2 = tcg_const_i32(tcg_ctx, 0);
-                            switch(size) {
-                            case 0: gen_helper_neon_ceq_u8(tcg_ctx, tmp, tmp, tmp2); break;
-                            case 1: gen_helper_neon_ceq_u16(tcg_ctx, tmp, tmp, tmp2); break;
-                            case 2: gen_helper_neon_ceq_u32(tcg_ctx, tmp, tmp, tmp2); break;
-                            default: abort();
-                            }
-                            tcg_temp_free_i32(tcg_ctx, tmp2);
-                            break;
-                        case NEON_2RM_VCGT0_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            tmp2 = tcg_const_i32(tcg_ctx, 0);
-                            gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                            tcg_temp_free_i32(tcg_ctx, tmp2);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCGE0_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            tmp2 = tcg_const_i32(tcg_ctx, 0);
-                            gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                            tcg_temp_free_i32(tcg_ctx, tmp2);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCEQ0_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            tmp2 = tcg_const_i32(tcg_ctx, 0);
-                            gen_helper_neon_ceq_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                            tcg_temp_free_i32(tcg_ctx, tmp2);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCLE0_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            tmp2 = tcg_const_i32(tcg_ctx, 0);
-                            gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp2, tmp, fpstatus);
-                            tcg_temp_free_i32(tcg_ctx, tmp2);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCLT0_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            tmp2 = tcg_const_i32(tcg_ctx, 0);
-                            gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp2, tmp, fpstatus);
-                            tcg_temp_free_i32(tcg_ctx, tmp2);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VABS_F:
-                            gen_helper_vfp_abss(tcg_ctx, tmp, tmp);
-                            break;
-                        case NEON_2RM_VNEG_F:
-                            gen_helper_vfp_negs(tcg_ctx, tmp, tmp);
-                            break;
-                        case NEON_2RM_VSWP:
-                            tmp2 = neon_load_reg(tcg_ctx, rd, pass);
-                            neon_store_reg(tcg_ctx, rm, pass, tmp2);
-                            break;
-                        case NEON_2RM_VTRN:
-                            tmp2 = neon_load_reg(tcg_ctx, rd, pass);
-                            switch (size) {
-                            case 0: gen_neon_trn_u8(tcg_ctx, tmp, tmp2); break;
-                            case 1: gen_neon_trn_u16(tcg_ctx, tmp, tmp2); break;
-                            default: abort();
-                            }
-                            neon_store_reg(tcg_ctx, rm, pass, tmp2);
-                            break;
-                        case NEON_2RM_VRINTN:
-                        case NEON_2RM_VRINTA:
-                        case NEON_2RM_VRINTM:
-                        case NEON_2RM_VRINTP:
-                        case NEON_2RM_VRINTZ:
-                        {
-                            TCGv_i32 tcg_rmode;
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            int rmode;
-
-                            if (op == NEON_2RM_VRINTZ) {
-                                rmode = FPROUNDING_ZERO;
-                            } else {
-                                rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
-                            }
-
-                            tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
-                            gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode,
-                                                      tcg_ctx->cpu_env);
-                            gen_helper_rints(tcg_ctx, tmp, tmp, fpstatus);
-                            gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode,
-                                                      tcg_ctx->cpu_env);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            tcg_temp_free_i32(tcg_ctx, tcg_rmode);
-                            break;
-                        }
-                        case NEON_2RM_VRINTX:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            gen_helper_rints_exact(tcg_ctx, tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCVTAU:
-                        case NEON_2RM_VCVTAS:
-                        case NEON_2RM_VCVTNU:
-                        case NEON_2RM_VCVTNS:
-                        case NEON_2RM_VCVTPU:
-                        case NEON_2RM_VCVTPS:
-                        case NEON_2RM_VCVTMU:
-                        case NEON_2RM_VCVTMS:
-                        {
-                            bool is_signed = !extract32(insn, 7, 1);
-                            TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, 1);
-                            TCGv_i32 tcg_rmode, tcg_shift;
-                            int rmode = fp_decode_rm[extract32(insn, 8, 2)];
-
-                            tcg_shift = tcg_const_i32(tcg_ctx, 0);
-                            tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
-                            gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode,
-                                                      tcg_ctx->cpu_env);
-
-                            if (is_signed) {
-                                gen_helper_vfp_tosls(tcg_ctx, tmp, tmp,
-                                                     tcg_shift, fpst);
-                            } else {
-                                gen_helper_vfp_touls(tcg_ctx, tmp, tmp,
-                                                     tcg_shift, fpst);
-                            }
-
-                            gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode,
-                                                      tcg_ctx->cpu_env);
-                            tcg_temp_free_i32(tcg_ctx, tcg_rmode);
-                            tcg_temp_free_i32(tcg_ctx, tcg_shift);
-                            tcg_temp_free_ptr(tcg_ctx, fpst);
-                            break;
-                        }
-                        case NEON_2RM_VRECPE:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            gen_helper_recpe_u32(tcg_ctx, tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VRSQRTE:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            gen_helper_rsqrte_u32(tcg_ctx, tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VRECPE_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            gen_helper_recpe_f32(tcg_ctx, tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VRSQRTE_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            gen_helper_rsqrte_f32(tcg_ctx, tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            gen_helper_vfp_sitos(tcg_ctx, tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            gen_helper_vfp_uitos(tcg_ctx, tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            gen_helper_vfp_tosizs(tcg_ctx, tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-                            gen_helper_vfp_touizs(tcg_ctx, tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-                            break;
-                        }
-                        default:
-                            /* Reserved op values were caught by the
-                             * neon_2rm_sizes[] check earlier.
-                             */
-                            abort();
-                        }
-                        neon_store_reg(tcg_ctx, rd, pass, tmp);
-                    }
-                    break;
-                }
-            } else if ((insn & (1 << 10)) == 0) {
-                /* VTBL, VTBX.  */
-                int n = ((insn >> 8) & 3) + 1;
-                if ((rn + n) > 32) {
-                    /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
-                     * helper function running off the end of the register file.
-                     */
-                    return 1;
-                }
-                n <<= 3;
-                if (insn & (1 << 6)) {
-                    tmp = neon_load_reg(tcg_ctx, rd, 0);
-                } else {
-                    tmp = tcg_temp_new_i32(tcg_ctx);
-                    tcg_gen_movi_i32(tcg_ctx, tmp, 0);
-                }
-                tmp2 = neon_load_reg(tcg_ctx, rm, 0);
-                ptr1 = vfp_reg_ptr(tcg_ctx, true, rn);
-                tmp5 = tcg_const_i32(tcg_ctx, n);
-                gen_helper_neon_tbl(tcg_ctx, tmp2, tmp2, tmp, ptr1, tmp5);
-                tcg_temp_free_i32(tcg_ctx, tmp);
-                if (insn & (1 << 6)) {
-                    tmp = neon_load_reg(tcg_ctx, rd, 1);
-                } else {
-                    tmp = tcg_temp_new_i32(tcg_ctx);
-                    tcg_gen_movi_i32(tcg_ctx, tmp, 0);
-                }
-                tmp3 = neon_load_reg(tcg_ctx, rm, 1);
-                gen_helper_neon_tbl(tcg_ctx, tmp3, tmp3, tmp, ptr1, tmp5);
-                tcg_temp_free_i32(tcg_ctx, tmp5);
-                tcg_temp_free_ptr(tcg_ctx, ptr1);
-                neon_store_reg(tcg_ctx, rd, 0, tmp2);
-                neon_store_reg(tcg_ctx, rd, 1, tmp3);
-                tcg_temp_free_i32(tcg_ctx, tmp);
-            } else if ((insn & 0x380) == 0) {
-                /* VDUP */
-                int element;
-                MemOp size;
+    tcg_gen_umin_vec(tcg_ctx, vece, t, a, b);
+    tcg_gen_umax_vec(tcg_ctx, vece, d, a, b);
+    tcg_gen_sub_vec(tcg_ctx, vece, d, d, t);
+    tcg_temp_free_vec(tcg_ctx, t);
+}
 
-                if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
-                    return 1;
-                }
-                if (insn & (1 << 16)) {
-                    size = MO_8;
-                    element = (insn >> 17) & 7;
-                } else if (insn & (1 << 17)) {
-                    size = MO_16;
-                    element = (insn >> 18) & 3;
-                } else {
-                    size = MO_32;
-                    element = (insn >> 19) & 1;
-                }
-                tcg_gen_gvec_dup_mem(tcg_ctx, size, neon_reg_offset(rd, 0),
-                                     neon_element_offset(rm, element, size),
-                                     q ? 16 : 8, q ? 16 : 8);
-            } else {
-                return 1;
-            }
-        }
-    }
-    return 0;
+void gen_gvec_uabd(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+    };
+    static const GVecGen3 ops[4] = {
+        { .fniv = gen_uabd_vec,
+          .fno = gen_helper_gvec_uabd_b,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fniv = gen_uabd_vec,
+          .fno = gen_helper_gvec_uabd_h,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_uabd_i32,
+          .fniv = gen_uabd_vec,
+          .fno = gen_helper_gvec_uabd_s,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_uabd_i64,
+          .fniv = gen_uabd_vec,
+          .fno = gen_helper_gvec_uabd_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .opt_opc = vecop_list,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
 }
 
-/* Advanced SIMD three registers of the same length extension.
- *  31           25    23  22    20   16   12  11   10   9    8        3     0
- * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
- * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
- * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
- */
-static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
+static void gen_saba_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    gen_helper_gvec_3 *fn_gvec = NULL;
-    gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
-    int rd, rn, rm, opr_sz;
-    int data = 0;
-    int off_rn, off_rm;
-    bool is_long = false, q = extract32(insn, 6, 1);
-    bool ptr_is_env = false;
-
-    if ((insn & 0xfe200f10) == 0xfc200800) {
-        /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
-        int size = extract32(insn, 20, 1);
-        data = extract32(insn, 23, 2); /* rot */
-        if (!dc_isar_feature(aa32_vcma, s)
-            || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
-            return 1;
-        }
-        fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
-    } else if ((insn & 0xfea00f10) == 0xfc800800) {
-        /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
-        int size = extract32(insn, 20, 1);
-        data = extract32(insn, 24, 1); /* rot */
-        if (!dc_isar_feature(aa32_vcma, s)
-            || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
-            return 1;
-        }
-        fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
-    } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
-        /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
-        bool u = extract32(insn, 4, 1);
-        if (!dc_isar_feature(aa32_dp, s)) {
-            return 1;
-        }
-        fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
-    } else if ((insn & 0xff300f10) == 0xfc200810) {
-        /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
-        int is_s = extract32(insn, 23, 1);
-        if (!dc_isar_feature(aa32_fhm, s)) {
-            return 1;
-        }
-        is_long = true;
-        data = is_s; /* is_2 == 0 */
-        fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
-        ptr_is_env = true;
-    } else {
-        return 1;
-    }
+    TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
+    gen_sabd_i32(tcg_ctx, t, a, b);
+    tcg_gen_add_i32(tcg_ctx, d, d, t);
+    tcg_temp_free_i32(tcg_ctx, t);
+}
 
-    VFP_DREG_D(rd, insn);
-    if (rd & q) {
-        return 1;
-    }
-    if (q || !is_long) {
-        VFP_DREG_N(rn, insn);
-        VFP_DREG_M(rm, insn);
-        if ((rn | rm) & q & !is_long) {
-            return 1;
-        }
-        off_rn = vfp_reg_offset(1, rn);
-        off_rm = vfp_reg_offset(1, rm);
-    } else {
-        rn = VFP_SREG_N(insn);
-        rm = VFP_SREG_M(insn);
-        off_rn = vfp_reg_offset(0, rn);
-        off_rm = vfp_reg_offset(0, rm);
-    }
+static void gen_saba_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
+    gen_sabd_i64(tcg_ctx, t, a, b);
+    tcg_gen_add_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
+}
 
-    if (s->fp_excp_el) {
-        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
-                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
-        return 0;
-    }
-    if (!s->vfp_enabled) {
-        return 1;
-    }
+static void gen_saba_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
+    gen_sabd_vec(tcg_ctx, vece, t, a, b);
+    tcg_gen_add_vec(tcg_ctx, vece, d, d, t);
+    tcg_temp_free_vec(tcg_ctx, t);
+}
 
-    opr_sz = (1 + q) * 8;
-    if (fn_gvec_ptr) {
-        TCGv_ptr ptr;
-        if (ptr_is_env) {
-            ptr = tcg_ctx->cpu_env;
-        } else {
-            ptr = get_fpstatus_ptr(tcg_ctx, 1);
-        }
-        tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
-                           opr_sz, opr_sz, data, fn_gvec_ptr);
-        if (!ptr_is_env) {
-            tcg_temp_free_ptr(tcg_ctx, ptr);
-        }
-    } else {
-        tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm,
-                           opr_sz, opr_sz, data, fn_gvec);
-    }
-    return 0;
+void gen_gvec_saba(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_sub_vec, INDEX_op_add_vec,
+        INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+    };
+    static const GVecGen3 ops[4] = {
+        { .fniv = gen_saba_vec,
+          .fno = gen_helper_gvec_saba_b,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_8 },
+        { .fniv = gen_saba_vec,
+          .fno = gen_helper_gvec_saba_h,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_16 },
+        { .fni4 = gen_saba_i32,
+          .fniv = gen_saba_vec,
+          .fno = gen_helper_gvec_saba_s,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_32 },
+        { .fni8 = gen_saba_i64,
+          .fniv = gen_saba_vec,
+          .fno = gen_helper_gvec_saba_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
 }
 
-/* Advanced SIMD two registers and a scalar extension.
- *  31             24   23  22   20   16   12  11   10   9    8        3     0
- * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
- * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
- * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
- *
- */
+static void gen_uaba_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
+    gen_uabd_i32(tcg_ctx, t, a, b);
+    tcg_gen_add_i32(tcg_ctx, d, d, t);
+    tcg_temp_free_i32(tcg_ctx, t);
+}
 
-static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
+static void gen_uaba_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    gen_helper_gvec_3 *fn_gvec = NULL;
-    gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
-    int rd, rn, rm, opr_sz, data;
-    int off_rn, off_rm;
-    bool is_long = false, q = extract32(insn, 6, 1);
-    bool ptr_is_env = false;
-
-    if ((insn & 0xff000f10) == 0xfe000800) {
-        /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
-        int rot = extract32(insn, 20, 2);
-        int size = extract32(insn, 23, 1);
-        int index;
-
-        if (!dc_isar_feature(aa32_vcma, s)) {
-            return 1;
-        }
-        if (size == 0) {
-            if (!dc_isar_feature(aa32_fp16_arith, s)) {
-                return 1;
-            }
-            /* For fp16, rm is just Vm, and index is M.  */
-            rm = extract32(insn, 0, 4);
-            index = extract32(insn, 5, 1);
-        } else {
-            /* For fp32, rm is the usual M:Vm, and index is 0.  */
-            VFP_DREG_M(rm, insn);
-            index = 0;
-        }
-        data = (index << 2) | rot;
-        fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
-                       : gen_helper_gvec_fcmlah_idx);
-    } else if ((insn & 0xffb00f00) == 0xfe200d00) {
-        /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
-        int u = extract32(insn, 4, 1);
-
-        if (!dc_isar_feature(aa32_dp, s)) {
-            return 1;
-        }
-        fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
-        /* rm is just Vm, and index is M.  */
-        data = extract32(insn, 5, 1); /* index */
-        rm = extract32(insn, 0, 4);
-    } else if ((insn & 0xffa00f10) == 0xfe000810) {
-        /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
-        int is_s = extract32(insn, 20, 1);
-        int vm20 = extract32(insn, 0, 3);
-        int vm3 = extract32(insn, 3, 1);
-        int m = extract32(insn, 5, 1);
-        int index;
-
-        if (!dc_isar_feature(aa32_fhm, s)) {
-            return 1;
-        }
-        if (q) {
-            rm = vm20;
-            index = m * 2 + vm3;
-        } else {
-            rm = vm20 * 2 + m;
-            index = vm3;
-        }
-        is_long = true;
-        data = (index << 2) | is_s; /* is_2 == 0 */
-        fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
-        ptr_is_env = true;
-    } else {
-        return 1;
-    }
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
+    gen_uabd_i64(tcg_ctx, t, a, b);
+    tcg_gen_add_i64(tcg_ctx, d, d, t);
+    tcg_temp_free_i64(tcg_ctx, t);
+}
 
-    VFP_DREG_D(rd, insn);
-    if (rd & q) {
-        return 1;
-    }
-    if (q || !is_long) {
-        VFP_DREG_N(rn, insn);
-        if (rn & q & !is_long) {
-            return 1;
-        }
-        off_rn = vfp_reg_offset(1, rn);
-        off_rm = vfp_reg_offset(1, rm);
-    } else {
-        rn = VFP_SREG_N(insn);
-        off_rn = vfp_reg_offset(0, rn);
-        off_rm = vfp_reg_offset(0, rm);
-    }
-    if (s->fp_excp_el) {
-        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
-                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
-        return 0;
-    }
-    if (!s->vfp_enabled) {
-        return 1;
-    }
+static void gen_uaba_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
+    gen_uabd_vec(tcg_ctx, vece, t, a, b);
+    tcg_gen_add_vec(tcg_ctx, vece, d, d, t);
+    tcg_temp_free_vec(tcg_ctx, t);
+}
 
-    opr_sz = (1 + q) * 8;
-    if (fn_gvec_ptr) {
-        TCGv_ptr ptr;
-        if (ptr_is_env) {
-            ptr = tcg_ctx->cpu_env;
-        } else {
-            ptr = get_fpstatus_ptr(tcg_ctx, 1);
-        }
-        tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
-                           opr_sz, opr_sz, data, fn_gvec_ptr);
-        if (!ptr_is_env) {
-            tcg_temp_free_ptr(tcg_ctx, ptr);
-        }
-    } else {
-        tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm,
-                           opr_sz, opr_sz, data, fn_gvec);
-    }
-    return 0;
+void gen_gvec_uaba(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_sub_vec, INDEX_op_add_vec,
+        INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+    };
+    static const GVecGen3 ops[4] = {
+        { .fniv = gen_uaba_vec,
+          .fno = gen_helper_gvec_uaba_b,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_8 },
+        { .fniv = gen_uaba_vec,
+          .fno = gen_helper_gvec_uaba_h,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_16 },
+        { .fni4 = gen_uaba_i32,
+          .fniv = gen_uaba_vec,
+          .fno = gen_helper_gvec_uaba_s,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_32 },
+        { .fni8 = gen_uaba_i64,
+          .fniv = gen_uaba_vec,
+          .fno = gen_helper_gvec_uaba_d,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .opt_opc = vecop_list,
+          .load_dest = true,
+          .vece = MO_64 },
+    };
+    tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
 }
 
 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
@@ -7734,7 +5198,7 @@ static void gen_srs(DisasContext *s,
         tcg_temp_free_i32(tcg_ctx, tmp);
     }
     tcg_temp_free_i32(tcg_ctx, addr);
-    s->base.is_jmp = DISAS_UPDATE;
+    s->base.is_jmp = DISAS_UPDATE_EXIT;
 }
 
 /* Generate a label used for skipping this instruction */
@@ -10076,7 +7540,7 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
     t1 = load_reg(s, a->rn);
     t2 = load_reg(s, a->rm);
     if (m_swap) {
-        gen_swap_half(tcg_ctx, t2);
+        gen_swap_half(tcg_ctx, t2, t2);
     }
     gen_smul_dual(tcg_ctx, t1, t2);
 
@@ -10135,7 +7599,7 @@ static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
     t1 = load_reg(s, a->rn);
     t2 = load_reg(s, a->rm);
     if (m_swap) {
-        gen_swap_half(tcg_ctx, t2);
+        gen_swap_half(tcg_ctx, t2, t2);
     }
     gen_smul_dual(tcg_ctx, t1, t2);
 
@@ -10490,9 +7954,6 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
             // gen_io_start(tcg_ctx);
         }
         gen_helper_cpsr_write_eret(tcg_ctx, tcg_ctx->cpu_env, tmp);
-        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-            // gen_io_end(tcg_ctx);
-        }
         tcg_temp_free_i32(tcg_ctx, tmp);
         /* Must exit loop to check un-masked IRQs */
         s->base.is_jmp = DISAS_EXIT;
@@ -10878,7 +8339,7 @@ static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
     }
     if (a->E != (s->be_data == MO_BE)) {
         gen_helper_setend(tcg_ctx, tcg_ctx->cpu_env);
-        s->base.is_jmp = DISAS_UPDATE;
+        s->base.is_jmp = DISAS_UPDATE_EXIT;
     }
     return true;
 }
@@ -10964,33 +8425,14 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
         /* Unconditional instructions.  */
         /* TODO: Perhaps merge these into one decodetree output file.  */
         if (disas_a32_uncond(s, insn) ||
-            disas_vfp_uncond(s, insn)) {
+            disas_vfp_uncond(s, insn) ||
+            disas_neon_dp(s, insn) ||
+            disas_neon_ls(s, insn) ||
+            disas_neon_shared(s, insn)) {
             return;
         }
         /* fall back to legacy decoder */
 
-        if (((insn >> 25) & 7) == 1) {
-            /* NEON Data processing.  */
-            if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-                goto illegal_op;
-            }
-
-            if (disas_neon_data_insn(s, insn)) {
-                goto illegal_op;
-            }
-            return;
-        }
-        if ((insn & 0x0f100000) == 0x04000000) {
-            /* NEON load/store.  */
-            if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-                goto illegal_op;
-            }
-
-            if (disas_neon_ls_insn(s, insn)) {
-                goto illegal_op;
-            }
-            return;
-        }
         if ((insn & 0x0e000f00) == 0x0c000100) {
             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
                 /* iWMMXt register transfer.  */
@@ -11000,18 +8442,6 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
                     }
                 }
             }
-        } else if ((insn & 0x0e000a00) == 0x0c000800
-                   && arm_dc_feature(s, ARM_FEATURE_V8)) {
-            if (disas_neon_insn_3same_ext(s, insn)) {
-                goto illegal_op;
-            }
-            return;
-        } else if ((insn & 0x0f000a00) == 0x0e000800
-                   && arm_dc_feature(s, ARM_FEATURE_V8)) {
-            if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
-                goto illegal_op;
-            }
-            return;
         }
         goto illegal_op;
     }
@@ -11126,6 +8556,33 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
         ARCH(6T2);
     }
 
+    if ((insn & 0xef000000) == 0xef000000) {
+        /*
+         * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+         * transform into
+         * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+         */
+        uint32_t a32_insn = (insn & 0xe2ffffff) |
+            ((insn & (1 << 28)) >> 4) | (1 << 28);
+
+        if (disas_neon_dp(s, a32_insn)) {
+            return;
+        }
+    }
+
+    if ((insn & 0xff100000) == 0xf9000000) {
+        /*
+         * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
+         * transform into
+         * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
+         */
+        uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
+
+        if (disas_neon_ls(s, a32_insn)) {
+            return;
+        }
+    }
+
     /*
      * TODO: Perhaps merge these into one decodetree output file.
      * Note disas_vfp is written for a32 with cond field in the
@@ -11133,6 +8590,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
      */
     if (disas_t32(s, insn) ||
         disas_vfp_uncond(s, insn) ||
+        disas_neon_shared(s, insn) ||
         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
         return;
     }
@@ -11162,24 +8620,9 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
             }
             break;
         }
-        if ((insn & 0xfe000a00) == 0xfc000800
-            && arm_dc_feature(s, ARM_FEATURE_V8)) {
-            /* The Thumb2 and ARM encodings are identical.  */
-            if (disas_neon_insn_3same_ext(s, insn)) {
-                goto illegal_op;
-            }
-        } else if ((insn & 0xff000a00) == 0xfe000800
-                   && arm_dc_feature(s, ARM_FEATURE_V8)) {
-            /* The Thumb2 and ARM encodings are identical.  */
-            if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
-                goto illegal_op;
-            }
-        } else if (((insn >> 24) & 3) == 3) {
-            /* Translate into the equivalent ARM encoding.  */
-            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
-            if (disas_neon_data_insn(s, insn)) {
-                goto illegal_op;
-            }
+        if (((insn >> 24) & 3) == 3) {
+            /* Neon DP, but failed disas_neon_dp() */
+            goto illegal_op;
         } else if (((insn >> 8) & 0xe) == 10) {
             /* VFP, but failed disas_vfp.  */
             goto illegal_op;
@@ -11192,12 +8635,6 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
         }
         break;
     case 12:
-        if ((insn & 0x01100000) == 0x01000000) {
-            if (disas_neon_ls_insn(s, insn)) {
-                goto illegal_op;
-            }
-            break;
-        }
         goto illegal_op;
     default:
     illegal_op:
@@ -11685,7 +9122,8 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
             break;
         case DISAS_NEXT:
         case DISAS_TOO_MANY:
-        case DISAS_UPDATE:
+        case DISAS_UPDATE_EXIT:
+        case DISAS_UPDATE_NOCHAIN:
             gen_set_pc_im(dc, dc->base.pc_next);
             /* fall through */
         default:
@@ -11709,10 +9147,13 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
         case DISAS_TOO_MANY:
             gen_goto_tb(dc, 1, dc->base.pc_next);
             break;
+        case DISAS_UPDATE_NOCHAIN:
+            gen_set_pc_im(dc, dc->base.pc_next);
+            /* fall through */
         case DISAS_JUMP:
             gen_goto_ptr(tcg_ctx);
             break;
-        case DISAS_UPDATE:
+        case DISAS_UPDATE_EXIT:
             gen_set_pc_im(dc, dc->base.pc_next);
             /* fall through */
         default:
diff --git a/qemu/target/arm/translate.h b/qemu/target/arm/translate.h
index 62ea7a5277..b0c4539267 100644
--- a/qemu/target/arm/translate.h
+++ b/qemu/target/arm/translate.h
@@ -29,6 +29,7 @@ typedef struct DisasContext {
     ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
     uint8_t tbii;      /* TBI1|TBI0 for insns */
     uint8_t tbid;      /* TBI1|TBI0 for data */
+    uint8_t tcma;      /* TCMA1|TCMA0 for MTE */
     bool ns;        /* Use non-secure CPREG bank on access */
     int fp_excp_el; /* FP exception EL or 0 if enabled */
     int sve_excp_el; /* SVE exception EL or 0 if enabled */
@@ -76,6 +77,10 @@ typedef struct DisasContext {
     bool unpriv;
     /* True if v8.3-PAuth is active.  */
     bool pauth_active;
+    /* True if v8.5-MTE access to tags is enabled.  */
+    bool ata;
+    /* True if v8.5-MTE tag checks affect the PE; index with is_unpriv.  */
+    bool mte_active[2];
     /* True with v8.5-BTI and SCTLR_ELx.BT* set.  */
     bool bt;
     /* True if any CP15 access is trapped by HSTR_EL2 */
@@ -85,6 +90,8 @@ typedef struct DisasContext {
      *  < 0, set by the current instruction.
      */
     int8_t btype;
+    /* A copy of cpu->dcz_blocksize. */
+    uint8_t dcz_blocksize;
     /* True if this page is guarded.  */
     bool guarded_page;
     /* Bottom two bits of XScale c15_cpar coprocessor access control reg */
@@ -145,7 +152,8 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
 
 /* is_jmp field values */
 #define DISAS_JUMP      DISAS_TARGET_0 /* only pc was modified dynamically */
-#define DISAS_UPDATE    DISAS_TARGET_1 /* cpu state was modified dynamically */
+/* CPU state was modified dynamically; exit to main loop for interrupts. */
+#define DISAS_UPDATE_EXIT  DISAS_TARGET_1
 /* These instructions trap after executing, so the A32/T32 decoder must
  * defer them until after the conditional execution state has been updated.
  * WFI also needs special handling when single-stepping.
@@ -161,13 +169,16 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
  * custom end-of-TB code)
  */
 #define DISAS_BX_EXCRET DISAS_TARGET_8
-/* For instructions which want an immediate exit to the main loop,
- * as opposed to attempting to use lookup_and_goto_ptr. Unlike
- * DISAS_UPDATE this doesn't write the PC on exiting the translation
- * loop so you need to ensure something (gen_a64_set_pc_im or runtime
- * helper) has done so before we reach return from cpu_tb_exec.
+/*
+ * For instructions which want an immediate exit to the main loop, as opposed
+ * to attempting to use lookup_and_goto_ptr.  Unlike DISAS_UPDATE_EXIT, this
+ * doesn't write the PC on exiting the translation loop so you need to ensure
+ * something (gen_a64_set_pc_im or runtime helper) has done so before we reach
+ * return from cpu_tb_exec.
  */
 #define DISAS_EXIT      DISAS_TARGET_9
+/* CPU state was modified dynamically; no need to exit, but do not chain. */
+#define DISAS_UPDATE_NOCHAIN  DISAS_TARGET_10
 
 #ifdef TARGET_AARCH64
 void a64_translate_init(struct uc_struct *uc);
@@ -274,28 +285,110 @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex)
 uint64_t vfp_expand_imm(int size, uint8_t imm8);
 
 /* Vector operations shared between ARM and AArch64.  */
-extern const GVecGen3 mla_op[4];
-extern const GVecGen3 mls_op[4];
-extern const GVecGen3 cmtst_op[4];
-extern const GVecGen3 sshl_op[4];
-extern const GVecGen3 ushl_op[4];
-extern const GVecGen2i ssra_op[4];
-extern const GVecGen2i usra_op[4];
-extern const GVecGen2i sri_op[4];
-extern const GVecGen2i sli_op[4];
-extern const GVecGen4 uqadd_op[4];
-extern const GVecGen4 sqadd_op[4];
-extern const GVecGen4 uqsub_op[4];
-extern const GVecGen4 sqsub_op[4];
+void gen_gvec_ceq0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                   uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_clt0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                   uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cgt0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                   uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cle0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                   uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cge0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                   uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_mla(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_mls(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_cmtst(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sshl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_ushl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
 void gen_cmtst_i64(TCGContext *, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 void gen_ushl_i32(TCGContext *, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void gen_sshl_i32(TCGContext *, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void gen_ushl_i64(TCGContext *, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 void gen_sshl_i64(TCGContext *, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 
+void gen_gvec_uqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_ssra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                   int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_usra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                   int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_srshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                    int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_urshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                    int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_srsra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                    int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_ursra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                    int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_sri(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                  int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sli(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                  int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_sqrdmlah_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sqrdmlsh_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_sabd(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uabd(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_saba(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uaba(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
 /*
  * Forward to the isar_feature_* tests given a DisasContext pointer.
  */
 #define dc_isar_feature(name, ctx) isar_feature_##name(ctx->isar)
 
+/* Note that the gvec expanders operate on offsets + sizes.  */
+typedef void GVecGen2Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
+typedef void GVecGen2iFn(TCGContext *, unsigned, uint32_t, uint32_t, int64_t,
+                         uint32_t, uint32_t);
+typedef void GVecGen3Fn(TCGContext *, unsigned, uint32_t, uint32_t,
+                        uint32_t, uint32_t, uint32_t);
+typedef void GVecGen4Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t,
+                        uint32_t, uint32_t, uint32_t);
+
+/* Function prototype for gen_ functions for calling Neon helpers */
+typedef void NeonGenOneOpFn(TCGContext *, TCGv_i32, TCGv_i32);
+typedef void NeonGenOneOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32);
+typedef void NeonGenTwoOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwoOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwo64OpFn(TCGContext*, TCGv_i64, TCGv_i64, TCGv_i64);
+typedef void NeonGenTwo64OpEnvFn(TCGContext *, TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
+typedef void NeonGenNarrowFn(TCGContext *, TCGv_i32, TCGv_i64);
+typedef void NeonGenNarrowEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i64);
+typedef void NeonGenWidenFn(TCGContext *, TCGv_i64, TCGv_i32);
+typedef void NeonGenTwoOpWidenFn(TCGContext *, TCGv_i64, TCGv_i32, TCGv_i32);
+typedef void NeonGenOneSingleOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoSingleOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoDoubleOpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
+typedef void NeonGenOne64OpFn(TCGContext *, TCGv_i64, TCGv_i64);
+typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
+typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void AtomicThreeOpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
+
 #endif /* TARGET_ARM_TRANSLATE_H */
diff --git a/qemu/target/arm/vec_helper.c b/qemu/target/arm/vec_helper.c
index a1839eed81..84a54cee05 100644
--- a/qemu/target/arm/vec_helper.c
+++ b/qemu/target/arm/vec_helper.c
@@ -22,7 +22,7 @@
 #include "exec/helper-proto.h"
 #include "tcg/tcg-gvec-desc.h"
 #include "fpu/softfloat.h"
-
+#include "vec_internal.h"
 
 /* Note that vector data is stored in host-endian 64-bit chunks,
    so addressing units smaller than that needs a host-endian fixup.  */
@@ -36,21 +36,9 @@
 #define H4(x)  (x)
 #endif
 
-#define SET_QC() env->vfp.qc[0] = 1
-
-static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
-{
-    uint64_t *d = (uint64_t *)((char *)vd + opr_sz);
-    uintptr_t i;
-
-    for (i = opr_sz; i < max_sz; i += 8) {
-        *d++ = 0;
-    }
-}
-
 /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
-static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
-                                int16_t src2, int16_t src3)
+static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
+                               int16_t src3, uint32_t *sat)
 {
     /* Simplify:
      * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
@@ -60,7 +48,7 @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
     ret = ((int32_t)src3 << 15) + ret + (1 << 14);
     ret >>= 15;
     if (ret != (int16_t)ret) {
-        SET_QC();
+        *sat = 1;
         ret = (ret < 0 ? -0x8000 : 0x7fff);
     }
     return ret;
@@ -69,30 +57,30 @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
 uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
                                   uint32_t src2, uint32_t src3)
 {
-    uint16_t e1 = inl_qrdmlah_s16(env, src1, src2, src3);
-    uint16_t e2 = inl_qrdmlah_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
+    uint32_t *sat = &env->vfp.qc[0];
+    uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat);
+    uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
     return deposit32(e1, 16, 16, e2);
 }
 
 void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
-                              void *ve, uint32_t desc)
+                              void *vq, uint32_t desc)
 {
     uintptr_t opr_sz = simd_oprsz(desc);
     int16_t *d = vd;
     int16_t *n = vn;
     int16_t *m = vm;
-    CPUARMState *env = ve;
     uintptr_t i;
 
     for (i = 0; i < opr_sz / 2; ++i) {
-        d[i] = inl_qrdmlah_s16(env, n[i], m[i], d[i]);
+        d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
 /* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
-static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
-                                int16_t src2, int16_t src3)
+static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2,
+                               int16_t src3, uint32_t *sat)
 {
     /* Similarly, using subtraction:
      * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
@@ -102,7 +90,7 @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
     ret = ((int32_t)src3 << 15) - ret + (1 << 14);
     ret >>= 15;
     if (ret != (int16_t)ret) {
-        SET_QC();
+        *sat = 1;
         ret = (ret < 0 ? -0x8000 : 0x7fff);
     }
     return ret;
@@ -111,85 +99,97 @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
 uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
                                   uint32_t src2, uint32_t src3)
 {
-    uint16_t e1 = inl_qrdmlsh_s16(env, src1, src2, src3);
-    uint16_t e2 = inl_qrdmlsh_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
+    uint32_t *sat = &env->vfp.qc[0];
+    uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat);
+    uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
     return deposit32(e1, 16, 16, e2);
 }
 
 void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
-                              void *ve, uint32_t desc)
+                              void *vq, uint32_t desc)
 {
     uintptr_t opr_sz = simd_oprsz(desc);
     int16_t *d = vd;
     int16_t *n = vn;
     int16_t *m = vm;
-    CPUARMState *env = ve;
     uintptr_t i;
 
     for (i = 0; i < opr_sz / 2; ++i) {
-        d[i] = inl_qrdmlsh_s16(env, n[i], m[i], d[i]);
+        d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
 /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
-uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
-                                  int32_t src2, int32_t src3)
+static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2,
+                               int32_t src3, uint32_t *sat)
 {
     /* Simplify similarly to int_qrdmlah_s16 above.  */
     int64_t ret = (int64_t)src1 * src2;
     ret = ((int64_t)src3 << 31) + ret + (1 << 30);
     ret >>= 31;
     if (ret != (int32_t)ret) {
-        SET_QC();
+        *sat = 1;
         ret = (ret < 0 ? INT32_MIN : INT32_MAX);
     }
     return ret;
 }
 
+uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
+                                  int32_t src2, int32_t src3)
+{
+    uint32_t *sat = &env->vfp.qc[0];
+    return inl_qrdmlah_s32(src1, src2, src3, sat);
+}
+
 void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
-                              void *ve, uint32_t desc)
+                              void *vq, uint32_t desc)
 {
     uintptr_t opr_sz = simd_oprsz(desc);
     int32_t *d = vd;
     int32_t *n = vn;
     int32_t *m = vm;
-    CPUARMState *env = ve;
     uintptr_t i;
 
     for (i = 0; i < opr_sz / 4; ++i) {
-        d[i] = helper_neon_qrdmlah_s32(env, n[i], m[i], d[i]);
+        d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
 /* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
-uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
-                                  int32_t src2, int32_t src3)
+static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2,
+                               int32_t src3, uint32_t *sat)
 {
     /* Simplify similarly to int_qrdmlsh_s16 above.  */
     int64_t ret = (int64_t)src1 * src2;
     ret = ((int64_t)src3 << 31) - ret + (1 << 30);
     ret >>= 31;
     if (ret != (int32_t)ret) {
-        SET_QC();
+        *sat = 1;
         ret = (ret < 0 ? INT32_MIN : INT32_MAX);
     }
     return ret;
 }
 
+uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
+                                  int32_t src2, int32_t src3)
+{
+    uint32_t *sat = &env->vfp.qc[0];
+    return inl_qrdmlsh_s32(src1, src2, src3, sat);
+}
+
 void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
-                              void *ve, uint32_t desc)
+                              void *vq, uint32_t desc)
 {
     uintptr_t opr_sz = simd_oprsz(desc);
     int32_t *d = vd;
     int32_t *n = vn;
     int32_t *m = vm;
-    CPUARMState *env = ve;
     uintptr_t i;
 
     for (i = 0; i < opr_sz / 4; ++i) {
-        d[i] = helper_neon_qrdmlsh_s32(env, n[i], m[i], d[i]);
+        d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
@@ -681,6 +681,11 @@ static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat)
     return result;
 }
 
+static float32 float32_abd(float32 op1, float32 op2, float_status *stat)
+{
+    return float32_abs(float32_sub(op1, op2, stat));
+}
+
 #define DO_3OP(NAME, FUNC, TYPE) \
 void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
 {                                                                          \
@@ -708,6 +713,8 @@ DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16)
 DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32)
 DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
 
+DO_3OP(gvec_fabd_s, float32_abd, float32)
+
 #ifdef TARGET_AARCH64
 
 DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
@@ -901,6 +908,118 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
     clear_tail(d, oprsz, simd_maxsz(desc));
 }
 
+#define DO_SRA(NAME, TYPE)                              \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)    \
+{                                                       \
+    intptr_t i, oprsz = simd_oprsz(desc);               \
+    int shift = simd_data(desc);                        \
+    TYPE *d = vd, *n = vn;                              \
+    for (i = 0; i < oprsz / sizeof(TYPE); i++) {        \
+        d[i] += n[i] >> shift;                          \
+    }                                                   \
+    clear_tail(d, oprsz, simd_maxsz(desc));             \
+}
+
+DO_SRA(gvec_ssra_b, int8_t)
+DO_SRA(gvec_ssra_h, int16_t)
+DO_SRA(gvec_ssra_s, int32_t)
+DO_SRA(gvec_ssra_d, int64_t)
+
+DO_SRA(gvec_usra_b, uint8_t)
+DO_SRA(gvec_usra_h, uint16_t)
+DO_SRA(gvec_usra_s, uint32_t)
+DO_SRA(gvec_usra_d, uint64_t)
+
+#undef DO_SRA
+
+#define DO_RSHR(NAME, TYPE)                             \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)    \
+{                                                       \
+    intptr_t i, oprsz = simd_oprsz(desc);               \
+    int shift = simd_data(desc);                        \
+    TYPE *d = vd, *n = vn;                              \
+    for (i = 0; i < oprsz / sizeof(TYPE); i++) {        \
+        TYPE tmp = n[i] >> (shift - 1);                 \
+        d[i] = (tmp >> 1) + (tmp & 1);                  \
+    }                                                   \
+    clear_tail(d, oprsz, simd_maxsz(desc));             \
+}
+
+DO_RSHR(gvec_srshr_b, int8_t)
+DO_RSHR(gvec_srshr_h, int16_t)
+DO_RSHR(gvec_srshr_s, int32_t)
+DO_RSHR(gvec_srshr_d, int64_t)
+
+DO_RSHR(gvec_urshr_b, uint8_t)
+DO_RSHR(gvec_urshr_h, uint16_t)
+DO_RSHR(gvec_urshr_s, uint32_t)
+DO_RSHR(gvec_urshr_d, uint64_t)
+
+#undef DO_RSHR
+
+#define DO_RSRA(NAME, TYPE)                             \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)    \
+{                                                       \
+    intptr_t i, oprsz = simd_oprsz(desc);               \
+    int shift = simd_data(desc);                        \
+    TYPE *d = vd, *n = vn;                              \
+    for (i = 0; i < oprsz / sizeof(TYPE); i++) {        \
+        TYPE tmp = n[i] >> (shift - 1);                 \
+        d[i] += (tmp >> 1) + (tmp & 1);                 \
+    }                                                   \
+    clear_tail(d, oprsz, simd_maxsz(desc));             \
+}
+
+DO_RSRA(gvec_srsra_b, int8_t)
+DO_RSRA(gvec_srsra_h, int16_t)
+DO_RSRA(gvec_srsra_s, int32_t)
+DO_RSRA(gvec_srsra_d, int64_t)
+
+DO_RSRA(gvec_ursra_b, uint8_t)
+DO_RSRA(gvec_ursra_h, uint16_t)
+DO_RSRA(gvec_ursra_s, uint32_t)
+DO_RSRA(gvec_ursra_d, uint64_t)
+
+#undef DO_RSRA
+
+#define DO_SRI(NAME, TYPE)                              \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)    \
+{                                                       \
+    intptr_t i, oprsz = simd_oprsz(desc);               \
+    int shift = simd_data(desc);                        \
+    TYPE *d = vd, *n = vn;                              \
+    for (i = 0; i < oprsz / sizeof(TYPE); i++) {        \
+        d[i] = deposit64(d[i], 0, sizeof(TYPE) * 8 - shift, n[i] >> shift); \
+    }                                                   \
+    clear_tail(d, oprsz, simd_maxsz(desc));             \
+}
+
+DO_SRI(gvec_sri_b, uint8_t)
+DO_SRI(gvec_sri_h, uint16_t)
+DO_SRI(gvec_sri_s, uint32_t)
+DO_SRI(gvec_sri_d, uint64_t)
+
+#undef DO_SRI
+
+#define DO_SLI(NAME, TYPE)                              \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)    \
+{                                                       \
+    intptr_t i, oprsz = simd_oprsz(desc);               \
+    int shift = simd_data(desc);                        \
+    TYPE *d = vd, *n = vn;                              \
+    for (i = 0; i < oprsz / sizeof(TYPE); i++) {        \
+        d[i] = deposit64(d[i], shift, sizeof(TYPE) * 8 - shift, n[i]); \
+    }                                                   \
+    clear_tail(d, oprsz, simd_maxsz(desc));             \
+}
+
+DO_SLI(gvec_sli_b, uint8_t)
+DO_SLI(gvec_sli_h, uint16_t)
+DO_SLI(gvec_sli_s, uint32_t)
+DO_SLI(gvec_sli_d, uint64_t)
+
+#undef DO_SLI
+
 /*
  * Convert float16 to float32, raising no exceptions and
  * preserving exceptional values, including SNaN.
@@ -1263,3 +1382,76 @@ void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
     }
 }
 #endif
+
+#define DO_CMP0(NAME, TYPE, OP)                         \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)    \
+{                                                       \
+    intptr_t i, opr_sz = simd_oprsz(desc);              \
+    for (i = 0; i < opr_sz; i += sizeof(TYPE)) {        \
+        TYPE nn = *(TYPE *)((char*)vn + i);             \
+        *(TYPE *)((char*)vd + i) = -(nn OP 0);          \
+    }                                                   \
+    clear_tail(vd, opr_sz, simd_maxsz(desc));           \
+}
+
+DO_CMP0(gvec_ceq0_b, int8_t, ==)
+DO_CMP0(gvec_clt0_b, int8_t, <)
+DO_CMP0(gvec_cle0_b, int8_t, <=)
+DO_CMP0(gvec_cgt0_b, int8_t, >)
+DO_CMP0(gvec_cge0_b, int8_t, >=)
+
+DO_CMP0(gvec_ceq0_h, int16_t, ==)
+DO_CMP0(gvec_clt0_h, int16_t, <)
+DO_CMP0(gvec_cle0_h, int16_t, <=)
+DO_CMP0(gvec_cgt0_h, int16_t, >)
+DO_CMP0(gvec_cge0_h, int16_t, >=)
+
+#undef DO_CMP0
+
+#define DO_ABD(NAME, TYPE)                                      \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)  \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc);                      \
+    TYPE *d = vd, *n = vn, *m = vm;                             \
+                                                                \
+    for (i = 0; i < opr_sz / sizeof(TYPE); ++i) {               \
+        d[i] = n[i] < m[i] ? m[i] - n[i] : n[i] - m[i];         \
+    }                                                           \
+    clear_tail(d, opr_sz, simd_maxsz(desc));                    \
+}
+
+DO_ABD(gvec_sabd_b, int8_t)
+DO_ABD(gvec_sabd_h, int16_t)
+DO_ABD(gvec_sabd_s, int32_t)
+DO_ABD(gvec_sabd_d, int64_t)
+
+DO_ABD(gvec_uabd_b, uint8_t)
+DO_ABD(gvec_uabd_h, uint16_t)
+DO_ABD(gvec_uabd_s, uint32_t)
+DO_ABD(gvec_uabd_d, uint64_t)
+
+#undef DO_ABD
+
+#define DO_ABA(NAME, TYPE)                                      \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)  \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc);                      \
+    TYPE *d = vd, *n = vn, *m = vm;                             \
+                                                                \
+    for (i = 0; i < opr_sz / sizeof(TYPE); ++i) {               \
+        d[i] += n[i] < m[i] ? m[i] - n[i] : n[i] - m[i];        \
+    }                                                           \
+    clear_tail(d, opr_sz, simd_maxsz(desc));                    \
+}
+
+DO_ABA(gvec_saba_b, int8_t)
+DO_ABA(gvec_saba_h, int16_t)
+DO_ABA(gvec_saba_s, int32_t)
+DO_ABA(gvec_saba_d, int64_t)
+
+DO_ABA(gvec_uaba_b, uint8_t)
+DO_ABA(gvec_uaba_h, uint16_t)
+DO_ABA(gvec_uaba_s, uint32_t)
+DO_ABA(gvec_uaba_d, uint64_t)
+
+#undef DO_ABA
diff --git a/qemu/target/arm/vec_internal.h b/qemu/target/arm/vec_internal.h
new file mode 100644
index 0000000000..3aa74b0151
--- /dev/null
+++ b/qemu/target/arm/vec_internal.h
@@ -0,0 +1,33 @@
+/*
+ * ARM AdvSIMD / SVE Vector Helpers
+ *
+ * Copyright (c) 2020 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TARGET_ARM_VEC_INTERNALS_H
+#define TARGET_ARM_VEC_INTERNALS_H
+
+static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
+{
+    uint64_t *d = (uint64_t *)((char*)vd + opr_sz);
+    uintptr_t i;
+
+    for (i = opr_sz; i < max_sz; i += 8) {
+        *d++ = 0;
+    }
+}
+
+#endif /* TARGET_ARM_VEC_INTERNALS_H */
diff --git a/qemu/target/arm/vfp_helper.c b/qemu/target/arm/vfp_helper.c
index 55bce5957a..753b5ed5e2 100644
--- a/qemu/target/arm/vfp_helper.c
+++ b/qemu/target/arm/vfp_helper.c
@@ -262,7 +262,7 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
     return float64_sqrt(a, &env->vfp.fp_status);
 }
 
-static void softfloat_to_vfp_compare(CPUARMState *env, int cmp)
+static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
 {
     uint32_t flags = 0;
     switch (cmp) {
@@ -536,7 +536,7 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
      * it would affect flushing input denormals.
      */
     float_status *fpst = fpstp;
-    flag save = get_flush_inputs_to_zero(fpst);
+    bool save = get_flush_inputs_to_zero(fpst);
     set_flush_inputs_to_zero(false, fpst);
     float32 r = float16_to_float32(a, !ahp_mode, fpst);
     set_flush_inputs_to_zero(save, fpst);
@@ -549,7 +549,7 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
      * it would affect flushing output denormals.
      */
     float_status *fpst = fpstp;
-    flag save = get_flush_to_zero(fpst);
+    bool save = get_flush_to_zero(fpst);
     set_flush_to_zero(false, fpst);
     float16 r = float32_to_float16(a, !ahp_mode, fpst);
     set_flush_to_zero(save, fpst);
@@ -562,7 +562,7 @@ float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
      * it would affect flushing input denormals.
      */
     float_status *fpst = fpstp;
-    flag save = get_flush_inputs_to_zero(fpst);
+    bool save = get_flush_inputs_to_zero(fpst);
     set_flush_inputs_to_zero(false, fpst);
     float64 r = float16_to_float64(a, !ahp_mode, fpst);
     set_flush_inputs_to_zero(save, fpst);
@@ -575,7 +575,7 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
      * it would affect flushing output denormals.
      */
     float_status *fpst = fpstp;
-    flag save = get_flush_to_zero(fpst);
+    bool save = get_flush_to_zero(fpst);
     set_flush_to_zero(false, fpst);
     float16 r = float64_to_float16(a, !ahp_mode, fpst);
     set_flush_to_zero(save, fpst);
@@ -586,7 +586,7 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
 #define float32_three make_float32(0x40400000)
 #define float32_one_point_five make_float32(0x3fc00000)
 
-float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
+float32 HELPER(recps_f32)(CPUARMState *env, float32 a, float32 b)
 {
     float_status *s = &env->vfp.standard_fp_status;
     if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
@@ -599,7 +599,7 @@ float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
     return float32_sub(float32_two, float32_mul(a, b, s), s);
 }
 
-float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
+float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b)
 {
     float_status *s = &env->vfp.standard_fp_status;
     float32 product;
@@ -702,11 +702,9 @@ static bool round_to_inf(float_status *fpst, bool sign_bit)
         return sign_bit;
     case float_round_to_zero: /* Round to Zero */
         return false;
+    default:
+        g_assert_not_reached();
     }
-
-    g_assert_not_reached();
-    // never reach here
-    return false;
 }
 
 uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
@@ -1030,9 +1028,8 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
     return make_float64(val);
 }
 
-uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
+uint32_t HELPER(recpe_u32)(uint32_t a)
 {
-    /* float_status *s = fpstp; */
     int input, estimate;
 
     if ((a & 0x80000000) == 0) {
@@ -1045,7 +1042,7 @@ uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
     return deposit32(0, (32 - 9), 9, estimate);
 }
 
-uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
+uint32_t HELPER(rsqrte_u32)(uint32_t a)
 {
     int estimate;
 
diff --git a/qemu/target/i386/cpu.c b/qemu/target/i386/cpu.c
index 86103b09e3..0cdd7a1ed5 100644
--- a/qemu/target/i386/cpu.c
+++ b/qemu/target/i386/cpu.c
@@ -852,10 +852,10 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         .type = CPUID_FEATURE_WORD,
         .feat_names = {
             NULL, NULL, "avx512-4vnniw", "avx512-4fmaps",
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, "md-clear", NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL /* pconfig */, NULL,
+            "fsrm", NULL, NULL, NULL,
+            "avx512-vp2intersect", NULL, "md-clear", NULL,
+            NULL, NULL, "serialize", NULL,
+            "tsx-ldtrk", NULL, NULL /* pconfig */, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, "spec-ctrl", "stibp",
             NULL, "arch-capabilities", "core-capability", "ssbd",
@@ -1001,6 +1001,22 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             .index = MSR_IA32_CORE_CAPABILITY,
         },
     },
+    [FEAT_PERF_CAPABILITIES] = {
+        .type = MSR_FEATURE_WORD,
+        .feat_names = {
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, "full-width-write", NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
+        .msr = {
+            .index = MSR_IA32_PERF_CAPABILITIES,
+        },
+    },
 
     [FEAT_VMX_PROCBASED_CTLS] = {
         .type = MSR_FEATURE_WORD,
@@ -2722,6 +2738,13 @@ static X86CPUDefinition builtin_x86_defs[] = {
                     { NULL /* end of list */ }
                 }
             },
+            {
+                .version = 4,
+                .props = (PropValue[]) {
+                    { "vmx-eptp-switching", "on" },
+                    { /* end of list */ }
+                }
+            },
             { 0 /* end of list */ }
         }
     },
@@ -2827,6 +2850,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
         .versions = (X86CPUVersionDefinition[]) {
             { .version = 1 },
             { .version = 2,
+              .note = "ARCH_CAPABILITIES",
               .props = (PropValue[]) {
                   { "arch-capabilities", "on" },
                   { "rdctl-no", "on" },
@@ -2838,12 +2862,20 @@ static X86CPUDefinition builtin_x86_defs[] = {
             },
             { .version = 3,
               .alias = "Cascadelake-Server-noTSX",
+              .note = "ARCH_CAPABILITIES, no TSX",
               .props = (PropValue[]) {
                   { "hle", "off" },
                   { "rtm", "off" },
                   { NULL /* end of list */ }
               },
             },
+            { .version = 4,
+              .note = "ARCH_CAPABILITIES, no TSX",
+              .props = (PropValue[]) {
+                  { "vmx-eptp-switching", "on" },
+                  { /* end of list */ }
+              },
+            },
             { 0 /* end of list */ }
         }
     },
@@ -3059,6 +3091,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             { .version = 1 },
             {
                 .version = 2,
+                .note = "no TSX",
                 .alias = "Icelake-Client-noTSX",
                 .props = (PropValue[]) {
                     { "hle", "off" },
@@ -3196,6 +3229,20 @@ static X86CPUDefinition builtin_x86_defs[] = {
                     { NULL /* end of list */ }
                 },
             },
+            {
+                .version = 4,
+                .props = (PropValue[]) {
+                    { "sha-ni", "on" },
+                    { "avx512ifma", "on" },
+                    { "rdpid", "on" },
+                    { "fsrm", "on" },
+                    { "vmx-rdseed-exit", "on" },
+                    { "vmx-pml", "on" },
+                    { "vmx-eptp-switching", "on" },
+                    { "model", "106" },
+                    { /* end of list */ }
+                },
+            },
             { 0 /* end of list */ }
         }
     },
@@ -3296,6 +3343,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             { .version = 1 },
             {
                 .version = 2,
+                .note = "no MPX, no MONITOR",
                 .props = (PropValue[]) {
                     { "monitor", "off" },
                     { "mpx", "off" },
@@ -3828,16 +3876,6 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
     }
 }
 
-/* Convert all '_' in a feature string option name to '-', to make feature
- * name conform to QOM property naming rule, which uses '-' instead of '_'.
- */
-static inline void feat2prop(char *s)
-{
-    while ((s = strchr(s, '_'))) {
-        *s = '-';
-    }
-}
-
 static void x86_cpu_filter_features(X86CPU *cpu, bool verbose);
 
 static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
@@ -3888,6 +3926,13 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model)
     }
 
     x86_cpuid_set_vendor(cpu, def->vendor);
+
+    /*
+     * Properties in versioned CPU model are not user specified features.
+     * We can simply clear env->user_features here since it will be filled later
+     * in x86_cpu_expand_features() based on plus_features and minus_features.
+     */
+    memset(&env->user_features, 0, sizeof(env->user_features));
 }
 
 void cpu_clear_apic_feature(CPUX86State *env)
@@ -4042,6 +4087,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             *ebx |= (cs->nr_cores * cs->nr_threads) << 16;
             *edx |= CPUID_HT;
         }
+        if (!cpu->enable_pmu) {
+            *ecx &= ~CPUID_EXT_PDCM;
+        }
         break;
     case 2:
         /* cache info: needed for Pentium Pro compatibility */
@@ -4330,9 +4378,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             host_cpuid(index, 0, eax, ebx, ecx, edx);
             break;
         }
-        *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | \
+        *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) |
                (L1_ITLB_2M_ASSOC <<  8) | (L1_ITLB_2M_ENTRIES);
-        *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | \
+        *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) |
                (L1_ITLB_4K_ASSOC <<  8) | (L1_ITLB_4K_ENTRIES);
         *ecx = encode_cache_cpuid80000005(env->cache_info_amd.l1d_cache);
         *edx = encode_cache_cpuid80000005(env->cache_info_amd.l1i_cache);
@@ -4343,13 +4391,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             host_cpuid(index, 0, eax, ebx, ecx, edx);
             break;
         }
-        *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | \
+        *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) |
                (L2_DTLB_2M_ENTRIES << 16) | \
-               (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | \
+               (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) |
                (L2_ITLB_2M_ENTRIES);
-        *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | \
+        *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) |
                (L2_DTLB_4K_ENTRIES << 16) | \
-               (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | \
+               (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) |
                (L2_ITLB_4K_ENTRIES);
         encode_cache_cpuid80000006(env->cache_info_amd.l2_cache,
                                    cpu->enable_l3_cache ?
@@ -4376,11 +4424,20 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             *eax = cpu->phys_bits;
         }
         *ebx = env->features[FEAT_8000_0008_EBX];
-        *ecx = 0;
-        *edx = 0;
         if (cs->nr_cores * cs->nr_threads > 1) {
-            *ecx |= (cs->nr_cores * cs->nr_threads) - 1;
+            /*
+             * Bits 15:12 is "The number of bits in the initial
+             * Core::X86::Apic::ApicId[ApicId] value that indicate
+             * thread ID within a package". This is already stored at
+             * CPUX86State::pkg_offset.
+             * Bits 7:0 is "The number of threads in the package is NC+1"
+             */
+            *ecx = (env->pkg_offset << 12) |
+                   ((cs->nr_cores * cs->nr_threads) - 1);
+        } else {
+            *ecx = 0;
         }
+        *edx = 0;
         break;
     case 0x8000000A:
         if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) {
@@ -4478,6 +4535,7 @@ static void x86_cpu_reset(CPUState *dev)
     /* init to reset state */
 
     env->hflags2 |= HF2_GIF_MASK;
+    env->hflags &= ~HF_GUEST_MASK;
 
     cpu_x86_update_cr0(env, 0x60000010);
     env->a20_mask = ~0x0;
@@ -4707,7 +4765,7 @@ static void x86_cpu_expand_features(X86CPU *cpu)
              */
             env->features[w] |=
                 x86_cpu_get_supported_feature_word(w, cpu->migratable) &
-                ~env->user_features[w] & \
+                ~env->user_features[w] &
                 ~feature_word_info[w].no_autoenable_flags;
         }
     }
@@ -4740,7 +4798,7 @@ static void x86_cpu_expand_features(X86CPU *cpu)
                 // TODO: Add a warning?
                 // mark_unavailable_features(cpu, FEAT_7_0_EBX,
                 //     CPUID_7_0_EBX_INTEL_PT,
-                //     "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,+intel-pt,level=0x14\"");
+                //     "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,+intel-pt,min-level=0x14\"");
             }
         }
 
diff --git a/qemu/target/i386/cpu.h b/qemu/target/i386/cpu.h
index 10d93b89ac..bc0664ffde 100644
--- a/qemu/target/i386/cpu.h
+++ b/qemu/target/i386/cpu.h
@@ -352,6 +352,8 @@ typedef enum X86Seg {
 #define MSR_IA32_ARCH_CAPABILITIES      0x10a
 #define ARCH_CAP_TSX_CTRL_MSR		(1<<7)
 
+#define MSR_IA32_PERF_CAPABILITIES      0x345
+
 #define MSR_IA32_TSX_CTRL		0x122
 #define MSR_IA32_TSCDEADLINE            0x6e0
 
@@ -525,6 +527,7 @@ typedef enum FeatureWord {
     FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */
     FEAT_ARCH_CAPABILITIES,
     FEAT_CORE_CAPABILITY,
+    FEAT_PERF_CAPABILITIES,
     FEAT_VMX_PROCBASED_CTLS,
     FEAT_VMX_SECONDARY_CTLS,
     FEAT_VMX_PINBASED_CTLS,
@@ -768,6 +771,14 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_EDX_AVX512_4VNNIW     (1U << 2)
 /* AVX512 Multiply Accumulation Single Precision */
 #define CPUID_7_0_EDX_AVX512_4FMAPS     (1U << 3)
+/* Fast Short Rep Mov */
+#define CPUID_7_0_EDX_FSRM              (1U << 4)
+/* AVX512 Vector Pair Intersection to a Pair of Mask Registers */
+#define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8)
+/* SERIALIZE instruction */
+#define CPUID_7_0_EDX_SERIALIZE         (1U << 14)
+/* TSX Suspend Load Address Tracking instruction */
+#define CPUID_7_0_EDX_TSX_LDTRK         (1U << 16)
 /* Speculation Control */
 #define CPUID_7_0_EDX_SPEC_CTRL         (1U << 26)
 /* Single Thread Indirect Branch Predictors */
@@ -1571,6 +1582,7 @@ typedef struct CPUX86State {
     bool tsc_valid;
     int64_t tsc_khz;
     int64_t user_tsc_khz; /* for sanity check only */
+    uint64_t apic_bus_freq;
 
     uint64_t mcg_cap;
     uint64_t mcg_ctl;
@@ -2043,6 +2055,11 @@ static inline bool cpu_has_vmx(CPUX86State *env)
     return env->features[FEAT_1_ECX] & CPUID_EXT_VMX;
 }
 
+static inline bool cpu_has_svm(CPUX86State *env)
+{
+    return env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM;
+}
+
 /*
  * In order for a vCPU to enter VMX operation it must have CR4.VMXE set.
  * Since it was set, CR4.VMXE must remain set as long as vCPU is in
@@ -2068,6 +2085,7 @@ static inline bool cpu_vmx_maybe_enabled(CPUX86State *env)
 /* fpu_helper.c */
 void update_fp_status(CPUX86State *env);
 void update_mxcsr_status(CPUX86State *env);
+void update_mxcsr_from_sse_status(CPUX86State *env);
 
 static inline void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
 {
diff --git a/qemu/target/i386/excp_helper.c b/qemu/target/i386/excp_helper.c
index cca25d322e..800c75dffd 100644
--- a/qemu/target/i386/excp_helper.c
+++ b/qemu/target/i386/excp_helper.c
@@ -261,8 +261,8 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
         }
         ptep = pde | PG_NX_MASK;
 
-        /* if PSE bit is set, then we use a 4MB page */
-        if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+        /* if host cr4 PSE bit is set, then we use a 4MB page */
+        if ((pde & PG_PSE_MASK) && (env->nested_pg_mode & SVM_NPT_PSE)) {
             page_size = 4096 * 1024;
             pte_addr = pde_addr;
 
diff --git a/qemu/target/i386/fpu_helper.c b/qemu/target/i386/fpu_helper.c
index b3f537000f..ea121fbfe5 100644
--- a/qemu/target/i386/fpu_helper.c
+++ b/qemu/target/i386/fpu_helper.c
@@ -25,6 +25,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "fpu/softfloat.h"
+#include "fpu/softfloat-macros.h"
 
 #define FPU_RC_MASK         0xc00
 #define FPU_RC_NEAR         0x000
@@ -55,8 +56,13 @@
 #define FPUC_EM 0x3f
 
 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
+#define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
+#define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
+#define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
+#define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
+#define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
 
 static void cpu_clear_ignne(CPUX86State *env)
 {
@@ -141,12 +147,32 @@ static void fpu_set_exception(CPUX86State *env, int mask)
     }
 }
 
+static inline uint8_t save_exception_flags(CPUX86State *env)
+{
+    uint8_t old_flags = get_float_exception_flags(&env->fp_status);
+    set_float_exception_flags(0, &env->fp_status);
+    return old_flags;
+}
+
+static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
+{
+    uint8_t new_flags = get_float_exception_flags(&env->fp_status);
+    float_raise(old_flags, &env->fp_status);
+    fpu_set_exception(env,
+                      ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
+                       (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
+                       (new_flags & float_flag_overflow ? FPUS_OE : 0) |
+                       (new_flags & float_flag_underflow ? FPUS_UE : 0) |
+                       (new_flags & float_flag_inexact ? FPUS_PE : 0) |
+                       (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
+}
+
 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 {
-    if (floatx80_is_zero(b)) {
-        fpu_set_exception(env, FPUS_ZE);
-    }
-    return floatx80_div(a, b, &env->fp_status);
+    uint8_t old_flags = save_exception_flags(env);
+    floatx80 ret = floatx80_div(a, b, &env->fp_status);
+    merge_exception_flags(env, old_flags);
+    return ret;
 }
 
 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
@@ -158,6 +184,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 
 void helper_flds_FT0(CPUX86State *env, uint32_t val)
 {
+    uint8_t old_flags = save_exception_flags(env);
     union {
         float32 f;
         uint32_t i;
@@ -165,10 +192,12 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val)
 
     u.i = val;
     FT0 = float32_to_floatx80(u.f, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 {
+    uint8_t old_flags = save_exception_flags(env);
     union {
         float64 f;
         uint64_t i;
@@ -176,6 +205,7 @@ void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 
     u.i = val;
     FT0 = float64_to_floatx80(u.f, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fildl_FT0(CPUX86State *env, int32_t val)
@@ -185,6 +215,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val)
 
 void helper_flds_ST0(CPUX86State *env, uint32_t val)
 {
+    uint8_t old_flags = save_exception_flags(env);
     int new_fpstt;
     union {
         float32 f;
@@ -196,10 +227,12 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val)
     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 {
+    uint8_t old_flags = save_exception_flags(env);
     int new_fpstt;
     union {
         float64 f;
@@ -211,6 +244,7 @@ void helper_fldl_ST0(CPUX86State *env, uint64_t val)
     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fildl_ST0(CPUX86State *env, int32_t val)
@@ -235,90 +269,108 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val)
 
 uint32_t helper_fsts_ST0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     union {
         float32 f;
         uint32_t i;
     } u;
 
     u.f = floatx80_to_float32(ST0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
     return u.i;
 }
 
 uint64_t helper_fstl_ST0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     union {
         float64 f;
         uint64_t i;
     } u;
 
     u.f = floatx80_to_float64(ST0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
     return u.i;
 }
 
 int32_t helper_fist_ST0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     int32_t val;
 
     val = floatx80_to_int32(ST0, &env->fp_status);
     if (val != (int16_t)val) {
+        set_float_exception_flags(float_flag_invalid, &env->fp_status);
         val = -32768;
     }
+    merge_exception_flags(env, old_flags);
     return val;
 }
 
 int32_t helper_fistl_ST0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     int32_t val;
-    signed char old_exp_flags;
-
-    old_exp_flags = get_float_exception_flags(&env->fp_status);
-    set_float_exception_flags(0, &env->fp_status);
 
     val = floatx80_to_int32(ST0, &env->fp_status);
     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
         val = 0x80000000;
     }
-    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
-                                | old_exp_flags, &env->fp_status);
+    merge_exception_flags(env, old_flags);
     return val;
 }
 
 int64_t helper_fistll_ST0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     int64_t val;
-    signed char old_exp_flags;
-
-    old_exp_flags = get_float_exception_flags(&env->fp_status);
-    set_float_exception_flags(0, &env->fp_status);
 
     val = floatx80_to_int64(ST0, &env->fp_status);
     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
         val = 0x8000000000000000ULL;
     }
-    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
-                                | old_exp_flags, &env->fp_status);
+    merge_exception_flags(env, old_flags);
     return val;
 }
 
 int32_t helper_fistt_ST0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     int32_t val;
 
     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
     if (val != (int16_t)val) {
+        set_float_exception_flags(float_flag_invalid, &env->fp_status);
         val = -32768;
     }
+    merge_exception_flags(env, old_flags);
     return val;
 }
 
 int32_t helper_fisttl_ST0(CPUX86State *env)
 {
-    return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
+    uint8_t old_flags = save_exception_flags(env);
+    int32_t val;
+
+    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
+    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
+        val = 0x80000000;
+    }
+    merge_exception_flags(env, old_flags);
+    return val;
 }
 
 int64_t helper_fisttll_ST0(CPUX86State *env)
 {
-    return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
+    uint8_t old_flags = save_exception_flags(env);
+    int64_t val;
+
+    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
+    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
+        val = 0x8000000000000000ULL;
+    }
+    merge_exception_flags(env, old_flags);
+    return val;
 }
 
 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
@@ -400,62 +452,78 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 
 void helper_fcom_ST0_FT0(CPUX86State *env)
 {
-    int ret;
+    uint8_t old_flags = save_exception_flags(env);
+    FloatRelation ret;
 
     ret = floatx80_compare(ST0, FT0, &env->fp_status);
     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fucom_ST0_FT0(CPUX86State *env)
 {
-    int ret;
+    uint8_t old_flags = save_exception_flags(env);
+    FloatRelation ret;
 
     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
+    merge_exception_flags(env, old_flags);
 }
 
 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 
 void helper_fcomi_ST0_FT0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     int eflags;
-    int ret;
+    FloatRelation ret;
 
     ret = floatx80_compare(ST0, FT0, &env->fp_status);
     eflags = cpu_cc_compute_all(env, CC_OP);
     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
     CC_SRC = eflags;
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fucomi_ST0_FT0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     int eflags;
-    int ret;
+    FloatRelation ret;
 
     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
     eflags = cpu_cc_compute_all(env, CC_OP);
     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
     CC_SRC = eflags;
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fadd_ST0_FT0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fmul_ST0_FT0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fsub_ST0_FT0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fsubr_ST0_FT0(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fdiv_ST0_FT0(CPUX86State *env)
@@ -472,22 +540,30 @@ void helper_fdivr_ST0_FT0(CPUX86State *env)
 
 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 {
+    uint8_t old_flags = save_exception_flags(env);
     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 {
+    uint8_t old_flags = save_exception_flags(env);
     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 {
+    uint8_t old_flags = save_exception_flags(env);
     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 {
+    uint8_t old_flags = save_exception_flags(env);
     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
@@ -519,58 +595,81 @@ void helper_fabs_ST0(CPUX86State *env)
 
 void helper_fld1_ST0(CPUX86State *env)
 {
-    //ST0 = floatx80_one;
-    floatx80 one = { 0x8000000000000000LL, 0x3fff };
-    ST0 = one;
+    ST0 = floatx80_one;
 }
 
 void helper_fldl2t_ST0(CPUX86State *env)
 {
-    //ST0 = floatx80_l2t;
-    floatx80 l2t = { 0xd49a784bcd1b8afeLL, 0x4000 };
-    ST0 = l2t;
+    switch (env->fpuc & FPU_RC_MASK) {
+    case FPU_RC_UP:
+        ST0 = floatx80_l2t_u;
+        break;
+    default:
+        ST0 = floatx80_l2t;
+        break;
+    }
 }
 
 void helper_fldl2e_ST0(CPUX86State *env)
 {
-    //ST0 = floatx80_l2e;
-    floatx80 l2e = { 0xb8aa3b295c17f0bcLL, 0x3fff };
-    ST0 = l2e;
+    switch (env->fpuc & FPU_RC_MASK) {
+    case FPU_RC_DOWN:
+    case FPU_RC_CHOP:
+        ST0 = floatx80_l2e_d;
+        break;
+    default:
+        ST0 = floatx80_l2e;
+        break;
+    }
 }
 
 void helper_fldpi_ST0(CPUX86State *env)
 {
-    //ST0 = floatx80_pi;
-    floatx80 pi = { 0xc90fdaa22168c235LL, 0x4000 };
-    ST0 = pi;
+    switch (env->fpuc & FPU_RC_MASK) {
+    case FPU_RC_DOWN:
+    case FPU_RC_CHOP:
+        ST0 = floatx80_pi_d;
+        break;
+    default:
+        ST0 = floatx80_pi;
+        break;
+    }
 }
 
 void helper_fldlg2_ST0(CPUX86State *env)
 {
-    //ST0 = floatx80_lg2;
-    floatx80 lg2 = { 0x9a209a84fbcff799LL, 0x3ffd };
-    ST0 = lg2;
+    switch (env->fpuc & FPU_RC_MASK) {
+    case FPU_RC_DOWN:
+    case FPU_RC_CHOP:
+        ST0 = floatx80_lg2_d;
+        break;
+    default:
+        ST0 = floatx80_lg2;
+        break;
+    }
 }
 
 void helper_fldln2_ST0(CPUX86State *env)
 {
-    //ST0 = floatx80_ln2;
-    floatx80 ln2 = { 0xb17217f7d1cf79acLL, 0x3ffe };
-    ST0 = ln2;
+    switch (env->fpuc & FPU_RC_MASK) {
+    case FPU_RC_DOWN:
+    case FPU_RC_CHOP:
+        ST0 = floatx80_ln2_d;
+        break;
+    default:
+        ST0 = floatx80_ln2;
+        break;
+    }
 }
 
 void helper_fldz_ST0(CPUX86State *env)
 {
-    //ST0 = floatx80_zero;
-    floatx80 zero = { 0x0000000000000000LL, 0x0000 };
-    ST0 = zero;
+    ST0 = floatx80_zero;
 }
 
 void helper_fldz_FT0(CPUX86State *env)
 {
-    //FT0 = floatx80_zero;
-    floatx80 zero = { 0x0000000000000000LL, 0x0000 };
-    FT0 = zero;
+    FT0 = floatx80_zero;
 }
 
 uint32_t helper_fnstsw(CPUX86State *env)
@@ -684,18 +783,31 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 
 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 {
+    uint8_t old_flags = save_exception_flags(env);
     int v;
     target_ulong mem_ref, mem_end;
     int64_t val;
+    CPU_LDoubleU temp;
+
+    temp.d = ST0;
 
     val = floatx80_to_int64(ST0, &env->fp_status);
     mem_ref = ptr;
+    if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
+        set_float_exception_flags(float_flag_invalid, &env->fp_status);
+        while (mem_ref < ptr + 7) {
+            cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
+        }
+        cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
+        cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
+        cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
+        merge_exception_flags(env, old_flags);
+        return;
+    }
     mem_end = mem_ref + 9;
-    if (val < 0) {
+    if (SIGND(temp)) {
         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
-        if (val != 0x8000000000000000LL) {
-            val = -val;
-        }
+        val = -val;
     } else {
         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
     }
@@ -705,35 +817,399 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
         }
         v = val % 100;
         val = val / 100;
-        v = (int)((unsigned int)(v / 10) << 4) | (v % 10);
+        v = ((v / 10) << 4) | (v % 10);
         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
     }
     while (mem_ref < mem_end) {
         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
     }
+    merge_exception_flags(env, old_flags);
 }
 
-void helper_f2xm1(CPUX86State *env)
-{
-    double val = floatx80_to_double(env, ST0);
+/* 128-bit significand of log(2).  */
+#define ln2_sig_high 0xb17217f7d1cf79abULL
+#define ln2_sig_low 0xc9e3b39803f2f6afULL
 
-    val = pow(2.0, val) - 1.0;
-    ST0 = double_to_floatx80(env, val);
-}
+/*
+ * Polynomial coefficients for an approximation to (2^x - 1) / x, on
+ * the interval [-1/64, 1/64].
+ */
+#define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
+#define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
+#define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
+#define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
+#define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
+#define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
+#define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
+#define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
+#define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
+
+struct f2xm1_data {
+    /*
+     * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
+     * are very close to exact floatx80 values.
+     */
+    floatx80 t;
+    /* The value of 2^t.  */
+    floatx80 exp2;
+    /* The value of 2^t - 1.  */
+    floatx80 exp2m1;
+};
+
+static const struct f2xm1_data f2xm1_table[65] = {
+    { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
+      make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
+      make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
+    { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
+      make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
+      make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
+    { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
+      make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
+      make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
+    { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
+      make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
+      make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
+    { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
+      make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
+      make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
+    { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
+      make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
+      make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
+    { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
+      make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
+      make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
+    { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
+      make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
+      make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
+    { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
+      make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
+      make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
+    { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
+      make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
+      make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
+    { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
+      make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
+      make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
+    { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
+      make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
+      make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
+    { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
+      make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
+      make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
+    { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
+      make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
+      make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
+    { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
+      make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
+      make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
+    { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
+      make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
+      make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
+    { make_floatx80_init(0xbffe, 0x800000000000227dULL),
+      make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
+      make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
+    { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
+      make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
+      make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
+    { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
+      make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
+      make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
+    { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
+      make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
+      make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
+    { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
+      make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
+      make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
+    { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
+      make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
+      make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
+    { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
+      make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
+      make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
+    { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
+      make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
+      make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
+    { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
+      make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
+      make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
+    { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
+      make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
+      make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
+    { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
+      make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
+      make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
+    { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
+      make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
+      make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
+    { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
+      make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
+      make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
+    { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
+      make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
+      make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
+    { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
+      make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
+      make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
+    { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
+      make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
+      make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
+    { floatx80_zero_init,
+      make_floatx80_init(0x3fff, 0x8000000000000000ULL),
+      floatx80_zero_init },
+    { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
+      make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
+      make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
+    { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
+      make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
+      make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
+    { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
+      make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
+      make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
+    { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
+      make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
+      make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
+    { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
+      make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
+      make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
+    { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
+      make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
+      make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
+    { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
+      make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
+      make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
+    { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
+      make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
+      make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
+    { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
+      make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
+      make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
+    { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
+      make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
+      make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
+    { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
+      make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
+      make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
+    { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
+      make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
+      make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
+    { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
+      make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
+      make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
+    { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
+      make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
+      make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
+    { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
+      make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
+      make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
+    { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
+      make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
+      make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
+    { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
+      make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
+      make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
+    { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
+      make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
+      make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
+    { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
+      make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
+      make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
+    { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
+      make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
+      make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
+    { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
+      make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
+      make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
+    { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
+      make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
+      make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
+    { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
+      make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
+      make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
+    { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
+      make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
+      make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
+    { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
+      make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
+      make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
+    { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
+      make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
+      make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
+    { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
+      make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
+      make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
+    { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
+      make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
+      make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
+    { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
+      make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
+      make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
+    { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
+      make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
+      make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
+    { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
+      make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
+      make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
+    { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
+      make_floatx80_init(0x4000, 0x8000000000000000ULL),
+      make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
+};
 
-void helper_fyl2x(CPUX86State *env)
+void helper_f2xm1(CPUX86State *env)
 {
-    double fptemp = floatx80_to_double(env, ST0);
-
-    if (fptemp > 0.0) {
-        fptemp = log(fptemp) / log(2.0); /* log2(ST) */
-        fptemp *= floatx80_to_double(env, ST1);
-        ST1 = double_to_floatx80(env, fptemp);
-        fpop(env);
+    uint8_t old_flags = save_exception_flags(env);
+    uint64_t sig = extractFloatx80Frac(ST0);
+    int32_t exp = extractFloatx80Exp(ST0);
+    bool sign = extractFloatx80Sign(ST0);
+
+    if (floatx80_invalid_encoding(ST0)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST0 = floatx80_default_nan(&env->fp_status);
+    } else if (floatx80_is_any_nan(ST0)) {
+        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
+            float_raise(float_flag_invalid, &env->fp_status);
+            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
+        }
+    } else if (exp > 0x3fff ||
+               (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
+        /* Out of range for the instruction, treat as invalid.  */
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST0 = floatx80_default_nan(&env->fp_status);
+    } else if (exp == 0x3fff) {
+        /* Argument 1 or -1, exact result 1 or -0.5.  */
+        if (sign) {
+            ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
+        }
+    } else if (exp < 0x3fb0) {
+        if (!floatx80_is_zero(ST0)) {
+            /*
+             * Multiplying the argument by an extra-precision version
+             * of log(2) is sufficiently precise.  Zero arguments are
+             * returned unchanged.
+             */
+            uint64_t sig0, sig1, sig2;
+            if (exp == 0) {
+                normalizeFloatx80Subnormal(sig, &exp, &sig);
+            }
+            mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
+                            &sig2);
+            /* This result is inexact.  */
+            sig1 |= 1;
+            ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1,
+                                                &env->fp_status);
+        }
     } else {
-        env->fpus &= ~0x4700;
-        env->fpus |= 0x400;
+        floatx80 tmp, y, accum;
+        bool asign, bsign;
+        int32_t n, aexp, bexp;
+        uint64_t asig0, asig1, asig2, bsig0, bsig1;
+        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
+        signed char save_prec = env->fp_status.floatx80_rounding_precision;
+        env->fp_status.float_rounding_mode = float_round_nearest_even;
+        env->fp_status.floatx80_rounding_precision = 80;
+
+        /* Find the nearest multiple of 1/32 to the argument.  */
+        tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
+        n = 32 + floatx80_to_int32(tmp, &env->fp_status);
+        y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
+
+        if (floatx80_is_zero(y)) {
+            /*
+             * Use the value of 2^t - 1 from the table, to avoid
+             * needing to special-case zero as a result of
+             * multiplication below.
+             */
+            ST0 = f2xm1_table[n].t;
+            set_float_exception_flags(float_flag_inexact, &env->fp_status);
+            env->fp_status.float_rounding_mode = save_mode;
+        } else {
+            /*
+             * Compute the lower parts of a polynomial expansion for
+             * (2^y - 1) / y.
+             */
+            accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
+            accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
+            accum = floatx80_mul(accum, y, &env->fp_status);
+            accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
+            accum = floatx80_mul(accum, y, &env->fp_status);
+            accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
+            accum = floatx80_mul(accum, y, &env->fp_status);
+            accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
+            accum = floatx80_mul(accum, y, &env->fp_status);
+            accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
+            accum = floatx80_mul(accum, y, &env->fp_status);
+            accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
+            accum = floatx80_mul(accum, y, &env->fp_status);
+            accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
+
+            /*
+             * The full polynomial expansion is f2xm1_coeff_0 + accum
+             * (where accum has much lower magnitude, and so, in
+             * particular, carry out of the addition is not possible).
+             * (This expansion is only accurate to about 70 bits, not
+             * 128 bits.)
+             */
+            aexp = extractFloatx80Exp(f2xm1_coeff_0);
+            asign = extractFloatx80Sign(f2xm1_coeff_0);
+            shift128RightJamming(extractFloatx80Frac(accum), 0,
+                                 aexp - extractFloatx80Exp(accum),
+                                 &asig0, &asig1);
+            bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
+            bsig1 = 0;
+            if (asign == extractFloatx80Sign(accum)) {
+                add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
+            } else {
+                sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
+            }
+            /* And thus compute an approximation to 2^y - 1.  */
+            mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
+                            &asig0, &asig1, &asig2);
+            aexp += extractFloatx80Exp(y) - 0x3ffe;
+            asign ^= extractFloatx80Sign(y);
+            if (n != 32) {
+                /*
+                 * Multiply this by the precomputed value of 2^t and
+                 * add that of 2^t - 1.
+                 */
+                mul128By64To192(asig0, asig1,
+                                extractFloatx80Frac(f2xm1_table[n].exp2),
+                                &asig0, &asig1, &asig2);
+                aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
+                bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
+                bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
+                bsig1 = 0;
+                if (bexp < aexp) {
+                    shift128RightJamming(bsig0, bsig1, aexp - bexp,
+                                         &bsig0, &bsig1);
+                } else if (aexp < bexp) {
+                    shift128RightJamming(asig0, asig1, bexp - aexp,
+                                         &asig0, &asig1);
+                    aexp = bexp;
+                }
+                /* The sign of 2^t - 1 is always that of the result.  */
+                bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
+                if (asign == bsign) {
+                    /* Avoid possible carry out of the addition.  */
+                    shift128RightJamming(asig0, asig1, 1,
+                                         &asig0, &asig1);
+                    shift128RightJamming(bsig0, bsig1, 1,
+                                         &bsig0, &bsig1);
+                    ++aexp;
+                    add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
+                } else {
+                    sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
+                    asign = bsign;
+                }
+            }
+            env->fp_status.float_rounding_mode = save_mode;
+            /* This result is inexact.  */
+            asig1 |= 1;
+            ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1,
+                                                &env->fp_status);
+        }
+
+        env->fp_status.floatx80_rounding_precision = save_prec;
     }
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fptan(CPUX86State *env)
@@ -743,194 +1219,996 @@ void helper_fptan(CPUX86State *env)
     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
         env->fpus |= 0x400;
     } else {
-        floatx80 one = { 0x8000000000000000LL, 0x3fff };
         fptemp = tan(fptemp);
         ST0 = double_to_floatx80(env, fptemp);
         fpush(env);
-        ST0 = one;
+        ST0 = floatx80_one;
         env->fpus &= ~0x400; /* C2 <-- 0 */
         /* the above code is for |arg| < 2**52 only */
     }
 }
 
+/* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision.  */
+#define pi_4_exp 0x3ffe
+#define pi_4_sig_high 0xc90fdaa22168c234ULL
+#define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
+#define pi_2_exp 0x3fff
+#define pi_2_sig_high 0xc90fdaa22168c234ULL
+#define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
+#define pi_34_exp 0x4000
+#define pi_34_sig_high 0x96cbe3f9990e91a7ULL
+#define pi_34_sig_low 0x9394c9e8a0a5159dULL
+#define pi_exp 0x4000
+#define pi_sig_high 0xc90fdaa22168c234ULL
+#define pi_sig_low 0xc4c6628b80dc1cd1ULL
+
+/*
+ * Polynomial coefficients for an approximation to atan(x), with only
+ * odd powers of x used, for x in the interval [-1/16, 1/16].  (Unlike
+ * for some other approximations, no low part is needed for the first
+ * coefficient here to achieve a sufficiently accurate result, because
+ * the coefficient in this minimax approximation is very close to
+ * exactly 1.)
+ */
+#define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
+#define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
+#define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
+#define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
+#define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
+#define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
+#define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
+
+struct fpatan_data {
+    /* High and low parts of atan(x).  */
+    floatx80 atan_high, atan_low;
+};
+
+static const struct fpatan_data fpatan_table[9] = {
+    { floatx80_zero_init,
+      floatx80_zero_init },
+    { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
+      make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
+    { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
+      make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
+    { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
+      make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
+    { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
+      make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
+    { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
+      make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
+    { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
+      make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
+    { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
+      make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
+    { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
+      make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
+};
+
 void helper_fpatan(CPUX86State *env)
 {
-    double fptemp, fpsrcop;
+    uint8_t old_flags = save_exception_flags(env);
+    uint64_t arg0_sig = extractFloatx80Frac(ST0);
+    int32_t arg0_exp = extractFloatx80Exp(ST0);
+    bool arg0_sign = extractFloatx80Sign(ST0);
+    uint64_t arg1_sig = extractFloatx80Frac(ST1);
+    int32_t arg1_exp = extractFloatx80Exp(ST1);
+    bool arg1_sign = extractFloatx80Sign(ST1);
+
+    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
+    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
+    } else if (floatx80_invalid_encoding(ST0) ||
+               floatx80_invalid_encoding(ST1)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_default_nan(&env->fp_status);
+    } else if (floatx80_is_any_nan(ST0)) {
+        ST1 = ST0;
+    } else if (floatx80_is_any_nan(ST1)) {
+        /* Pass this NaN through.  */
+    } else if (floatx80_is_zero(ST1) && !arg0_sign) {
+        /* Pass this zero through.  */
+    } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
+                 arg0_exp - arg1_exp >= 80) &&
+               !arg0_sign) {
+        /*
+         * Dividing ST1 by ST0 gives the correct result up to
+         * rounding, and avoids spurious underflow exceptions that
+         * might result from passing some small values through the
+         * polynomial approximation, but if a finite nonzero result of
+         * division is exact, the result of fpatan is still inexact
+         * (and underflowing where appropriate).
+         */
+        signed char save_prec = env->fp_status.floatx80_rounding_precision;
+        env->fp_status.floatx80_rounding_precision = 80;
+        ST1 = floatx80_div(ST1, ST0, &env->fp_status);
+        env->fp_status.floatx80_rounding_precision = save_prec;
+        if (!floatx80_is_zero(ST1) &&
+            !(get_float_exception_flags(&env->fp_status) &
+              float_flag_inexact)) {
+            /*
+             * The mathematical result is very slightly closer to zero
+             * than this exact result.  Round a value with the
+             * significand adjusted accordingly to get the correct
+             * exceptions, and possibly an adjusted result depending
+             * on the rounding mode.
+             */
+            uint64_t sig = extractFloatx80Frac(ST1);
+            int32_t exp = extractFloatx80Exp(ST1);
+            bool sign = extractFloatx80Sign(ST1);
+            if (exp == 0) {
+                normalizeFloatx80Subnormal(sig, &exp, &sig);
+            }
+            ST1 = normalizeRoundAndPackFloatx80(80, sign, exp, sig - 1,
+                                                -1, &env->fp_status);
+        }
+    } else {
+        /* The result is inexact.  */
+        bool rsign = arg1_sign;
+        int32_t rexp;
+        uint64_t rsig0, rsig1;
+        if (floatx80_is_zero(ST1)) {
+            /*
+             * ST0 is negative.  The result is pi with the sign of
+             * ST1.
+             */
+            rexp = pi_exp;
+            rsig0 = pi_sig_high;
+            rsig1 = pi_sig_low;
+        } else if (floatx80_is_infinity(ST1)) {
+            if (floatx80_is_infinity(ST0)) {
+                if (arg0_sign) {
+                    rexp = pi_34_exp;
+                    rsig0 = pi_34_sig_high;
+                    rsig1 = pi_34_sig_low;
+                } else {
+                    rexp = pi_4_exp;
+                    rsig0 = pi_4_sig_high;
+                    rsig1 = pi_4_sig_low;
+                }
+            } else {
+                rexp = pi_2_exp;
+                rsig0 = pi_2_sig_high;
+                rsig1 = pi_2_sig_low;
+            }
+        } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
+            rexp = pi_2_exp;
+            rsig0 = pi_2_sig_high;
+            rsig1 = pi_2_sig_low;
+        } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
+            /* ST0 is negative.  */
+            rexp = pi_exp;
+            rsig0 = pi_sig_high;
+            rsig1 = pi_sig_low;
+        } else {
+            /*
+             * ST0 and ST1 are finite, nonzero and with exponents not
+             * too far apart.
+             */
+            int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
+            int32_t azexp, axexp;
+            bool adj_sub, ysign, zsign;
+            uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
+            uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
+            uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
+            uint64_t azsig0, azsig1;
+            uint64_t azsig2, azsig3, axsig0, axsig1;
+            floatx80 x8;
+            FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
+            signed char save_prec = env->fp_status.floatx80_rounding_precision;
+            env->fp_status.float_rounding_mode = float_round_nearest_even;
+            env->fp_status.floatx80_rounding_precision = 80;
+
+            if (arg0_exp == 0) {
+                normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
+            }
+            if (arg1_exp == 0) {
+                normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
+            }
+            if (arg0_exp > arg1_exp ||
+                (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
+                /* Work with abs(ST1) / abs(ST0).  */
+                num_exp = arg1_exp;
+                num_sig = arg1_sig;
+                den_exp = arg0_exp;
+                den_sig = arg0_sig;
+                if (arg0_sign) {
+                    /* The result is subtracted from pi.  */
+                    adj_exp = pi_exp;
+                    adj_sig0 = pi_sig_high;
+                    adj_sig1 = pi_sig_low;
+                    adj_sub = true;
+                } else {
+                    /* The result is used as-is.  */
+                    adj_exp = 0;
+                    adj_sig0 = 0;
+                    adj_sig1 = 0;
+                    adj_sub = false;
+                }
+            } else {
+                /* Work with abs(ST0) / abs(ST1).  */
+                num_exp = arg0_exp;
+                num_sig = arg0_sig;
+                den_exp = arg1_exp;
+                den_sig = arg1_sig;
+                /* The result is added to or subtracted from pi/2.  */
+                adj_exp = pi_2_exp;
+                adj_sig0 = pi_2_sig_high;
+                adj_sig1 = pi_2_sig_low;
+                adj_sub = !arg0_sign;
+            }
+
+            /*
+             * Compute x = num/den, where 0 < x <= 1 and x is not too
+             * small.
+             */
+            xexp = num_exp - den_exp + 0x3ffe;
+            remsig0 = num_sig;
+            remsig1 = 0;
+            if (den_sig <= remsig0) {
+                shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
+                ++xexp;
+            }
+            xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
+            mul64To128(den_sig, xsig0, &msig0, &msig1);
+            sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
+            while ((int64_t) remsig0 < 0) {
+                --xsig0;
+                add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
+            }
+            xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
+            /*
+             * No need to correct any estimation error in xsig1; even
+             * with such error, it is accurate enough.
+             */
+
+            /*
+             * Split x as x = t + y, where t = n/8 is the nearest
+             * multiple of 1/8 to x.
+             */
+            x8 = normalizeRoundAndPackFloatx80(80, false, xexp + 3, xsig0,
+                                               xsig1, &env->fp_status);
+            n = floatx80_to_int32(x8, &env->fp_status);
+            if (n == 0) {
+                ysign = false;
+                yexp = xexp;
+                ysig0 = xsig0;
+                ysig1 = xsig1;
+                texp = 0;
+                tsig = 0;
+            } else {
+                int shift = clz32(n) + 32;
+                texp = 0x403b - shift;
+                tsig = n;
+                tsig <<= shift;
+                if (texp == xexp) {
+                    sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
+                    if ((int64_t) ysig0 >= 0) {
+                        ysign = false;
+                        if (ysig0 == 0) {
+                            if (ysig1 == 0) {
+                                yexp = 0;
+                            } else {
+                                shift = clz64(ysig1) + 64;
+                                yexp = xexp - shift;
+                                shift128Left(ysig0, ysig1, shift,
+                                             &ysig0, &ysig1);
+                            }
+                        } else {
+                            shift = clz64(ysig0);
+                            yexp = xexp - shift;
+                            shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
+                        }
+                    } else {
+                        ysign = true;
+                        sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
+                        if (ysig0 == 0) {
+                            shift = clz64(ysig1) + 64;
+                        } else {
+                            shift = clz64(ysig0);
+                        }
+                        yexp = xexp - shift;
+                        shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
+                    }
+                } else {
+                    /*
+                     * t's exponent must be greater than x's because t
+                     * is positive and the nearest multiple of 1/8 to
+                     * x, and if x has a greater exponent, the power
+                     * of 2 with that exponent is also a multiple of
+                     * 1/8.
+                     */
+                    uint64_t usig0, usig1;
+                    shift128RightJamming(xsig0, xsig1, texp - xexp,
+                                         &usig0, &usig1);
+                    ysign = true;
+                    sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
+                    if (ysig0 == 0) {
+                        shift = clz64(ysig1) + 64;
+                    } else {
+                        shift = clz64(ysig0);
+                    }
+                    yexp = texp - shift;
+                    shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
+                }
+            }
+
+            /*
+             * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
+             * arctan(z).
+             */
+            zsign = ysign;
+            if (texp == 0 || yexp == 0) {
+                zexp = yexp;
+                zsig0 = ysig0;
+                zsig1 = ysig1;
+            } else {
+                /*
+                 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
+                 */
+                int32_t dexp = texp + xexp - 0x3ffe;
+                uint64_t dsig0, dsig1, dsig2;
+                mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
+                /*
+                 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
+                 * bit).  Add 1 to produce the denominator 1+tx.
+                 */
+                shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
+                                     &dsig0, &dsig1);
+                dsig0 |= 0x8000000000000000ULL;
+                zexp = yexp - 1;
+                remsig0 = ysig0;
+                remsig1 = ysig1;
+                remsig2 = 0;
+                if (dsig0 <= remsig0) {
+                    shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
+                    ++zexp;
+                }
+                zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
+                mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
+                sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
+                       &remsig0, &remsig1, &remsig2);
+                while ((int64_t) remsig0 < 0) {
+                    --zsig0;
+                    add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
+                           &remsig0, &remsig1, &remsig2);
+                }
+                zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
+                /* No need to correct any estimation error in zsig1.  */
+            }
+
+            if (zexp == 0) {
+                azexp = 0;
+                azsig0 = 0;
+                azsig1 = 0;
+            } else {
+                floatx80 z2, accum;
+                uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
+                /* Compute z^2.  */
+                mul128To256(zsig0, zsig1, zsig0, zsig1,
+                            &z2sig0, &z2sig1, &z2sig2, &z2sig3);
+                z2 = normalizeRoundAndPackFloatx80(80, false,
+                                                   zexp + zexp - 0x3ffe,
+                                                   z2sig0, z2sig1,
+                                                   &env->fp_status);
+
+                /* Compute the lower parts of the polynomial expansion.  */
+                accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
+                accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
+                accum = floatx80_mul(accum, z2, &env->fp_status);
+                accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
+                accum = floatx80_mul(accum, z2, &env->fp_status);
+                accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
+                accum = floatx80_mul(accum, z2, &env->fp_status);
+                accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
+                accum = floatx80_mul(accum, z2, &env->fp_status);
+                accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
+                accum = floatx80_mul(accum, z2, &env->fp_status);
+
+                /*
+                 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
+                 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
+                 */
+                aexp = extractFloatx80Exp(fpatan_coeff_0);
+                shift128RightJamming(extractFloatx80Frac(accum), 0,
+                                     aexp - extractFloatx80Exp(accum),
+                                     &asig0, &asig1);
+                sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
+                       &asig0, &asig1);
+                /* Multiply by z to compute arctan(z).  */
+                azexp = aexp + zexp - 0x3ffe;
+                mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
+                            &azsig2, &azsig3);
+            }
+
+            /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign).  */
+            if (texp == 0) {
+                /* z is positive.  */
+                axexp = azexp;
+                axsig0 = azsig0;
+                axsig1 = azsig1;
+            } else {
+                bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
+                int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
+                uint64_t low_sig0 =
+                    extractFloatx80Frac(fpatan_table[n].atan_low);
+                uint64_t low_sig1 = 0;
+                axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
+                axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
+                axsig1 = 0;
+                shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
+                                     &low_sig0, &low_sig1);
+                if (low_sign) {
+                    sub128(axsig0, axsig1, low_sig0, low_sig1,
+                           &axsig0, &axsig1);
+                } else {
+                    add128(axsig0, axsig1, low_sig0, low_sig1,
+                           &axsig0, &axsig1);
+                }
+                if (azexp >= axexp) {
+                    shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
+                                         &axsig0, &axsig1);
+                    axexp = azexp + 1;
+                    shift128RightJamming(azsig0, azsig1, 1,
+                                         &azsig0, &azsig1);
+                } else {
+                    shift128RightJamming(axsig0, axsig1, 1,
+                                         &axsig0, &axsig1);
+                    shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
+                                         &azsig0, &azsig1);
+                    ++axexp;
+                }
+                if (zsign) {
+                    sub128(axsig0, axsig1, azsig0, azsig1,
+                           &axsig0, &axsig1);
+                } else {
+                    add128(axsig0, axsig1, azsig0, azsig1,
+                           &axsig0, &axsig1);
+                }
+            }
+
+            if (adj_exp == 0) {
+                rexp = axexp;
+                rsig0 = axsig0;
+                rsig1 = axsig1;
+            } else {
+                /*
+                 * Add or subtract arctan(x) (exponent axexp,
+                 * significand axsig0 and axsig1, positive, not
+                 * necessarily normalized) to the number given by
+                 * adj_exp, adj_sig0 and adj_sig1, according to
+                 * adj_sub.
+                 */
+                if (adj_exp >= axexp) {
+                    shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
+                                         &axsig0, &axsig1);
+                    rexp = adj_exp + 1;
+                    shift128RightJamming(adj_sig0, adj_sig1, 1,
+                                         &adj_sig0, &adj_sig1);
+                } else {
+                    shift128RightJamming(axsig0, axsig1, 1,
+                                         &axsig0, &axsig1);
+                    shift128RightJamming(adj_sig0, adj_sig1,
+                                         axexp - adj_exp + 1,
+                                         &adj_sig0, &adj_sig1);
+                    rexp = axexp + 1;
+                }
+                if (adj_sub) {
+                    sub128(adj_sig0, adj_sig1, axsig0, axsig1,
+                           &rsig0, &rsig1);
+                } else {
+                    add128(adj_sig0, adj_sig1, axsig0, axsig1,
+                           &rsig0, &rsig1);
+                }
+            }
+
+            env->fp_status.float_rounding_mode = save_mode;
+            env->fp_status.floatx80_rounding_precision = save_prec;
+        }
+        /* This result is inexact.  */
+        rsig1 |= 1;
+        ST1 = normalizeRoundAndPackFloatx80(80, rsign, rexp,
+                                            rsig0, rsig1, &env->fp_status);
+    }
 
-    fpsrcop = floatx80_to_double(env, ST1);
-    fptemp = floatx80_to_double(env, ST0);
-    ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
     fpop(env);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fxtract(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     CPU_LDoubleU temp;
 
     temp.d = ST0;
 
     if (floatx80_is_zero(ST0)) {
         /* Easy way to generate -inf and raising division by 0 exception */
-        floatx80 zero = { 0x0000000000000000LL, 0x0000 };
-        floatx80 one  = { 0x8000000000000000LL, 0x3fff };
-        ST0 = floatx80_div(floatx80_chs(one), zero,
+        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
                            &env->fp_status);
         fpush(env);
         ST0 = temp.d;
+    } else if (floatx80_invalid_encoding(ST0)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST0 = floatx80_default_nan(&env->fp_status);
+        fpush(env);
+        ST0 = ST1;
+    } else if (floatx80_is_any_nan(ST0)) {
+        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
+            float_raise(float_flag_invalid, &env->fp_status);
+            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
+        }
+        fpush(env);
+        ST0 = ST1;
+    } else if (floatx80_is_infinity(ST0)) {
+        fpush(env);
+        ST0 = ST1;
+        ST1 = floatx80_infinity;
     } else {
         int expdif;
 
-        expdif = EXPD(temp) - EXPBIAS;
+        if (EXPD(temp) == 0) {
+            int shift = clz64(temp.l.lower);
+            temp.l.lower <<= shift;
+            expdif = 1 - EXPBIAS - shift;
+            float_raise(float_flag_input_denormal, &env->fp_status);
+        } else {
+            expdif = EXPD(temp) - EXPBIAS;
+        }
         /* DP exponent bias */
         ST0 = int32_to_floatx80(expdif, &env->fp_status);
         fpush(env);
         BIASEXPONENT(temp);
         ST0 = temp.d;
     }
+    merge_exception_flags(env, old_flags);
 }
 
-void helper_fprem1(CPUX86State *env)
+static void helper_fprem_common(CPUX86State *env, bool mod)
 {
-    double st0, st1, dblq, fpsrcop, fptemp;
-    CPU_LDoubleU fpsrcop1, fptemp1;
-    int expdif;
-    signed long long int q;
-
-    st0 = floatx80_to_double(env, ST0);
-    st1 = floatx80_to_double(env, ST1);
-
-    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
-        ST0 = double_to_floatx80(env, NAN); /* NaN */
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        return;
-    }
-
-    fpsrcop = st0;
-    fptemp = st1;
-    fpsrcop1.d = ST0;
-    fptemp1.d = ST1;
-    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+    uint8_t old_flags = save_exception_flags(env);
+    uint64_t quotient;
+    CPU_LDoubleU temp0, temp1;
+    int exp0, exp1, expdiff;
 
-    if (expdif < 0) {
-        /* optimisation? taken from the AMD docs */
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        /* ST0 is unchanged */
-        return;
-    }
+    temp0.d = ST0;
+    temp1.d = ST1;
+    exp0 = EXPD(temp0);
+    exp1 = EXPD(temp1);
 
-    if (expdif < 53) {
-        dblq = fpsrcop / fptemp;
-        /* round dblq towards nearest integer */
-        dblq = rint(dblq);
-        st0 = fpsrcop - fptemp * dblq;
-
-        /* convert dblq to q by truncating towards zero */
-        if (dblq < 0.0) {
-            q = (signed long long int)(-dblq);
+    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+    if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
+        exp0 == 0x7fff || exp1 == 0x7fff ||
+        floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
+        ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
+    } else {
+        if (exp0 == 0) {
+            exp0 = 1 - clz64(temp0.l.lower);
+        }
+        if (exp1 == 0) {
+            exp1 = 1 - clz64(temp1.l.lower);
+        }
+        expdiff = exp0 - exp1;
+        if (expdiff < 64) {
+            ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
+            env->fpus |= (quotient & 0x4) << (8 - 2);  /* (C0) <-- q2 */
+            env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
+            env->fpus |= (quotient & 0x1) << (9 - 0);  /* (C1) <-- q0 */
         } else {
-            q = (signed long long int)dblq;
+            /*
+             * Partial remainder.  This choice of how many bits to
+             * process at once is specified in AMD instruction set
+             * manuals, and empirically is followed by Intel
+             * processors as well; it ensures that the final remainder
+             * operation in a loop does produce the correct low three
+             * bits of the quotient.  AMD manuals specify that the
+             * flags other than C2 are cleared, and empirically Intel
+             * processors clear them as well.
+             */
+            int n = 32 + (expdiff % 32);
+            temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
+            ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
+            env->fpus |= 0x400;  /* C2 <-- 1 */
         }
-
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        /* (C0,C3,C1) <-- (q2,q1,q0) */
-        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
-        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
-        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
-    } else {
-        env->fpus |= 0x400;  /* C2 <-- 1 */
-        fptemp = pow(2.0, expdif - 50);
-        fpsrcop = (st0 / st1) / fptemp;
-        /* fpsrcop = integer obtained by chopping */
-        fpsrcop = (fpsrcop < 0.0) ?
-                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
-        st0 -= (st1 * fpsrcop * fptemp);
     }
-    ST0 = double_to_floatx80(env, st0);
+    merge_exception_flags(env, old_flags);
 }
 
-void helper_fprem(CPUX86State *env)
+void helper_fprem1(CPUX86State *env)
 {
-    double st0, st1, dblq, fpsrcop, fptemp;
-    CPU_LDoubleU fpsrcop1, fptemp1;
-    int expdif;
-    signed long long int q;
-
-    st0 = floatx80_to_double(env, ST0);
-    st1 = floatx80_to_double(env, ST1);
-
-    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
-        ST0 = double_to_floatx80(env, NAN); /* NaN */
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        return;
-    }
+    helper_fprem_common(env, false);
+}
 
-    fpsrcop = st0;
-    fptemp = st1;
-    fpsrcop1.d = ST0;
-    fptemp1.d = ST1;
-    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+void helper_fprem(CPUX86State *env)
+{
+    helper_fprem_common(env, true);
+}
 
-    if (expdif < 0) {
-        /* optimisation? taken from the AMD docs */
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        /* ST0 is unchanged */
-        return;
-    }
+/* 128-bit significand of log2(e).  */
+#define log2_e_sig_high 0xb8aa3b295c17f0bbULL
+#define log2_e_sig_low 0xbe87fed0691d3e89ULL
 
-    if (expdif < 53) {
-        dblq = fpsrcop / fptemp; /* ST0 / ST1 */
-        /* round dblq towards zero */
-        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
-        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
+/*
+ * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
+ * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
+ * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
+ * interval [sqrt(2)/2, sqrt(2)].
+ */
+#define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
+#define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
+#define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
+#define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
+#define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
+#define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
+#define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
+#define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
+#define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
+#define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
+#define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
 
-        /* convert dblq to q by truncating towards zero */
-        if (dblq < 0.0) {
-            q = (signed long long int)(-dblq);
-        } else {
-            q = (signed long long int)dblq;
-        }
+/*
+ * Compute an approximation of log2(1+arg), where 1+arg is in the
+ * interval [sqrt(2)/2, sqrt(2)].  It is assumed that when this
+ * function is called, rounding precision is set to 80 and the
+ * round-to-nearest mode is in effect.  arg must not be exactly zero,
+ * and must not be so close to zero that underflow might occur.
+ */
+static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
+                                uint64_t *sig0, uint64_t *sig1)
+{
+    uint64_t arg0_sig = extractFloatx80Frac(arg);
+    int32_t arg0_exp = extractFloatx80Exp(arg);
+    bool arg0_sign = extractFloatx80Sign(arg);
+    bool asign;
+    int32_t dexp, texp, aexp;
+    uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
+    uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
+    uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
+    floatx80 t2, accum;
 
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        /* (C0,C3,C1) <-- (q2,q1,q0) */
-        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
-        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
-        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
+    /*
+     * Compute an approximation of arg/(2+arg), with extra precision,
+     * as the argument to a polynomial approximation.  The extra
+     * precision is only needed for the first term of the
+     * approximation, with subsequent terms being significantly
+     * smaller; the approximation only uses odd exponents, and the
+     * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
+     */
+    if (arg0_sign) {
+        dexp = 0x3fff;
+        shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
+        sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
     } else {
-        int N = 32 + (expdif % 32); /* as per AMD docs */
+        dexp = 0x4000;
+        shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
+        dsig0 |= 0x8000000000000000ULL;
+    }
+    texp = arg0_exp - dexp + 0x3ffe;
+    rsig0 = arg0_sig;
+    rsig1 = 0;
+    rsig2 = 0;
+    if (dsig0 <= rsig0) {
+        shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
+        ++texp;
+    }
+    tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
+    mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
+    sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
+           &rsig0, &rsig1, &rsig2);
+    while ((int64_t) rsig0 < 0) {
+        --tsig0;
+        add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
+               &rsig0, &rsig1, &rsig2);
+    }
+    tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
+    /*
+     * No need to correct any estimation error in tsig1; even with
+     * such error, it is accurate enough.  Now compute the square of
+     * that approximation.
+     */
+    mul128To256(tsig0, tsig1, tsig0, tsig1,
+                &t2sig0, &t2sig1, &t2sig2, &t2sig3);
+    t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe,
+                                       t2sig0, t2sig1, &env->fp_status);
+
+    /* Compute the lower parts of the polynomial expansion.  */
+    accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
+    accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
+    accum = floatx80_mul(accum, t2, &env->fp_status);
+    accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
+    accum = floatx80_mul(accum, t2, &env->fp_status);
+    accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
+    accum = floatx80_mul(accum, t2, &env->fp_status);
+    accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
+    accum = floatx80_mul(accum, t2, &env->fp_status);
+    accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
+    accum = floatx80_mul(accum, t2, &env->fp_status);
+    accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
+    accum = floatx80_mul(accum, t2, &env->fp_status);
+    accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
+    accum = floatx80_mul(accum, t2, &env->fp_status);
+    accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
+    accum = floatx80_mul(accum, t2, &env->fp_status);
+    accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
 
-        env->fpus |= 0x400;  /* C2 <-- 1 */
-        fptemp = pow(2.0, (double)(expdif - N));
-        fpsrcop = (st0 / st1) / fptemp;
-        /* fpsrcop = integer obtained by chopping */
-        fpsrcop = (fpsrcop < 0.0) ?
-                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
-        st0 -= (st1 * fpsrcop * fptemp);
+    /*
+     * The full polynomial expansion is fyl2x_coeff_0 + accum (where
+     * accum has much lower magnitude, and so, in particular, carry
+     * out of the addition is not possible), multiplied by t.  (This
+     * expansion is only accurate to about 70 bits, not 128 bits.)
+     */
+    aexp = extractFloatx80Exp(fyl2x_coeff_0);
+    asign = extractFloatx80Sign(fyl2x_coeff_0);
+    shift128RightJamming(extractFloatx80Frac(accum), 0,
+                         aexp - extractFloatx80Exp(accum),
+                         &asig0, &asig1);
+    bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
+    bsig1 = 0;
+    if (asign == extractFloatx80Sign(accum)) {
+        add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
+    } else {
+        sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
     }
-    ST0 = double_to_floatx80(env, st0);
+    /* Multiply by t to compute the required result.  */
+    mul128To256(asig0, asig1, tsig0, tsig1,
+                &asig0, &asig1, &asig2, &asig3);
+    aexp += texp - 0x3ffe;
+    *exp = aexp;
+    *sig0 = asig0;
+    *sig1 = asig1;
 }
 
 void helper_fyl2xp1(CPUX86State *env)
 {
-    double fptemp = floatx80_to_double(env, ST0);
+    uint8_t old_flags = save_exception_flags(env);
+    uint64_t arg0_sig = extractFloatx80Frac(ST0);
+    int32_t arg0_exp = extractFloatx80Exp(ST0);
+    bool arg0_sign = extractFloatx80Sign(ST0);
+    uint64_t arg1_sig = extractFloatx80Frac(ST1);
+    int32_t arg1_exp = extractFloatx80Exp(ST1);
+    bool arg1_sign = extractFloatx80Sign(ST1);
+
+    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
+    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
+    } else if (floatx80_invalid_encoding(ST0) ||
+               floatx80_invalid_encoding(ST1)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_default_nan(&env->fp_status);
+    } else if (floatx80_is_any_nan(ST0)) {
+        ST1 = ST0;
+    } else if (floatx80_is_any_nan(ST1)) {
+        /* Pass this NaN through.  */
+    } else if (arg0_exp > 0x3ffd ||
+               (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
+                                                  0x95f619980c4336f7ULL :
+                                                  0xd413cccfe7799211ULL))) {
+        /*
+         * Out of range for the instruction (ST0 must have absolute
+         * value less than 1 - sqrt(2)/2 = 0.292..., according to
+         * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
+         * to sqrt(2) - 1, which we allow here), treat as invalid.
+         */
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_default_nan(&env->fp_status);
+    } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
+               arg1_exp == 0x7fff) {
+        /*
+         * One argument is zero, or multiplying by infinity; correct
+         * result is exact and can be obtained by multiplying the
+         * arguments.
+         */
+        ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
+    } else if (arg0_exp < 0x3fb0) {
+        /*
+         * Multiplying both arguments and an extra-precision version
+         * of log2(e) is sufficiently precise.
+         */
+        uint64_t sig0, sig1, sig2;
+        int32_t exp;
+        if (arg0_exp == 0) {
+            normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
+        }
+        if (arg1_exp == 0) {
+            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
+        }
+        mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
+                        &sig0, &sig1, &sig2);
+        exp = arg0_exp + 1;
+        mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
+        exp += arg1_exp - 0x3ffe;
+        /* This result is inexact.  */
+        sig1 |= 1;
+        ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp,
+                                            sig0, sig1, &env->fp_status);
+    } else {
+        int32_t aexp;
+        uint64_t asig0, asig1, asig2;
+        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
+        signed char save_prec = env->fp_status.floatx80_rounding_precision;
+        env->fp_status.float_rounding_mode = float_round_nearest_even;
+        env->fp_status.floatx80_rounding_precision = 80;
+
+        helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
+        /*
+         * Multiply by the second argument to compute the required
+         * result.
+         */
+        if (arg1_exp == 0) {
+            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
+        }
+        mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
+        aexp += arg1_exp - 0x3ffe;
+        /* This result is inexact.  */
+        asig1 |= 1;
+        env->fp_status.float_rounding_mode = save_mode;
+        ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp,
+                                            asig0, asig1, &env->fp_status);
+        env->fp_status.floatx80_rounding_precision = save_prec;
+    }
+    fpop(env);
+    merge_exception_flags(env, old_flags);
+}
 
-    if ((fptemp + 1.0) > 0.0) {
-        fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
-        fptemp *= floatx80_to_double(env, ST1);
-        ST1 = double_to_floatx80(env, fptemp);
-        fpop(env);
+void helper_fyl2x(CPUX86State *env)
+{
+    uint8_t old_flags = save_exception_flags(env);
+    uint64_t arg0_sig = extractFloatx80Frac(ST0);
+    int32_t arg0_exp = extractFloatx80Exp(ST0);
+    bool arg0_sign = extractFloatx80Sign(ST0);
+    uint64_t arg1_sig = extractFloatx80Frac(ST1);
+    int32_t arg1_exp = extractFloatx80Exp(ST1);
+    bool arg1_sign = extractFloatx80Sign(ST1);
+
+    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
+    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
+    } else if (floatx80_invalid_encoding(ST0) ||
+               floatx80_invalid_encoding(ST1)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_default_nan(&env->fp_status);
+    } else if (floatx80_is_any_nan(ST0)) {
+        ST1 = ST0;
+    } else if (floatx80_is_any_nan(ST1)) {
+        /* Pass this NaN through.  */
+    } else if (arg0_sign && !floatx80_is_zero(ST0)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST1 = floatx80_default_nan(&env->fp_status);
+    } else if (floatx80_is_infinity(ST1)) {
+        FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
+                                             &env->fp_status);
+        switch (cmp) {
+        case float_relation_less:
+            ST1 = floatx80_chs(ST1);
+            break;
+        case float_relation_greater:
+            /* Result is infinity of the same sign as ST1.  */
+            break;
+        default:
+            float_raise(float_flag_invalid, &env->fp_status);
+            ST1 = floatx80_default_nan(&env->fp_status);
+            break;
+        }
+    } else if (floatx80_is_infinity(ST0)) {
+        if (floatx80_is_zero(ST1)) {
+            float_raise(float_flag_invalid, &env->fp_status);
+            ST1 = floatx80_default_nan(&env->fp_status);
+        } else if (arg1_sign) {
+            ST1 = floatx80_chs(ST0);
+        } else {
+            ST1 = ST0;
+        }
+    } else if (floatx80_is_zero(ST0)) {
+        if (floatx80_is_zero(ST1)) {
+            float_raise(float_flag_invalid, &env->fp_status);
+            ST1 = floatx80_default_nan(&env->fp_status);
+        } else {
+            /* Result is infinity with opposite sign to ST1.  */
+            float_raise(float_flag_divbyzero, &env->fp_status);
+            ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
+                                0x8000000000000000ULL);
+        }
+    } else if (floatx80_is_zero(ST1)) {
+        if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
+            ST1 = floatx80_chs(ST1);
+        }
+        /* Otherwise, ST1 is already the correct result.  */
+    } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
+        if (arg1_sign) {
+            ST1 = floatx80_chs(floatx80_zero);
+        } else {
+            ST1 = floatx80_zero;
+        }
     } else {
-        env->fpus &= ~0x4700;
-        env->fpus |= 0x400;
+        int32_t int_exp;
+        floatx80 arg0_m1;
+        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
+        signed char save_prec = env->fp_status.floatx80_rounding_precision;
+        env->fp_status.float_rounding_mode = float_round_nearest_even;
+        env->fp_status.floatx80_rounding_precision = 80;
+
+        if (arg0_exp == 0) {
+            normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
+        }
+        if (arg1_exp == 0) {
+            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
+        }
+        int_exp = arg0_exp - 0x3fff;
+        if (arg0_sig > 0xb504f333f9de6484ULL) {
+            ++int_exp;
+        }
+        arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
+                                               &env->fp_status),
+                               floatx80_one, &env->fp_status);
+        if (floatx80_is_zero(arg0_m1)) {
+            /* Exact power of 2; multiply by ST1.  */
+            env->fp_status.float_rounding_mode = save_mode;
+            ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
+                               ST1, &env->fp_status);
+        } else {
+            bool asign = extractFloatx80Sign(arg0_m1);
+            int32_t aexp;
+            uint64_t asig0, asig1, asig2;
+            helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
+            if (int_exp != 0) {
+                bool isign = (int_exp < 0);
+                int32_t iexp;
+                uint64_t isig;
+                int shift;
+                int_exp = isign ? -int_exp : int_exp;
+                shift = clz32(int_exp) + 32;
+                isig = int_exp;
+                isig <<= shift;
+                iexp = 0x403e - shift;
+                shift128RightJamming(asig0, asig1, iexp - aexp,
+                                     &asig0, &asig1);
+                if (asign == isign) {
+                    add128(isig, 0, asig0, asig1, &asig0, &asig1);
+                } else {
+                    sub128(isig, 0, asig0, asig1, &asig0, &asig1);
+                }
+                aexp = iexp;
+                asign = isign;
+            }
+            /*
+             * Multiply by the second argument to compute the required
+             * result.
+             */
+            if (arg1_exp == 0) {
+                normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
+            }
+            mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
+            aexp += arg1_exp - 0x3ffe;
+            /* This result is inexact.  */
+            asig1 |= 1;
+            env->fp_status.float_rounding_mode = save_mode;
+            ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp,
+                                                asig0, asig1, &env->fp_status);
+        }
+
+        env->fp_status.floatx80_rounding_precision = save_prec;
     }
+    fpop(env);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fsqrt(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     if (floatx80_is_neg(ST0)) {
         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
         env->fpus |= 0x400;
     }
     ST0 = floatx80_sqrt(ST0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fsincos(CPUX86State *env)
@@ -950,17 +2228,60 @@ void helper_fsincos(CPUX86State *env)
 
 void helper_frndint(CPUX86State *env)
 {
+    uint8_t old_flags = save_exception_flags(env);
     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fscale(CPUX86State *env)
 {
-    if (floatx80_is_any_nan(ST1)) {
+    uint8_t old_flags = save_exception_flags(env);
+    if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        ST0 = floatx80_default_nan(&env->fp_status);
+    } else if (floatx80_is_any_nan(ST1)) {
+        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
+            float_raise(float_flag_invalid, &env->fp_status);
+        }
         ST0 = ST1;
+        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
+            float_raise(float_flag_invalid, &env->fp_status);
+            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
+        }
+    } else if (floatx80_is_infinity(ST1) &&
+               !floatx80_invalid_encoding(ST0) &&
+               !floatx80_is_any_nan(ST0)) {
+        if (floatx80_is_neg(ST1)) {
+            if (floatx80_is_infinity(ST0)) {
+                float_raise(float_flag_invalid, &env->fp_status);
+                ST0 = floatx80_default_nan(&env->fp_status);
+            } else {
+                ST0 = (floatx80_is_neg(ST0) ?
+                       floatx80_chs(floatx80_zero) :
+                       floatx80_zero);
+            }
+        } else {
+            if (floatx80_is_zero(ST0)) {
+                float_raise(float_flag_invalid, &env->fp_status);
+                ST0 = floatx80_default_nan(&env->fp_status);
+            } else {
+                ST0 = (floatx80_is_neg(ST0) ?
+                       floatx80_chs(floatx80_infinity) :
+                       floatx80_infinity);
+            }
+        }
     } else {
-        int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
+        int n;
+        signed char save = env->fp_status.floatx80_rounding_precision;
+        uint8_t save_flags = get_float_exception_flags(&env->fp_status);
+        set_float_exception_flags(0, &env->fp_status);
+        n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
+        set_float_exception_flags(save_flags, &env->fp_status);
+        env->fp_status.floatx80_rounding_precision = 80;
         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
+        env->fp_status.floatx80_rounding_precision = save;
     }
+    merge_exception_flags(env, old_flags);
 }
 
 void helper_fsin(CPUX86State *env)
@@ -1010,7 +2331,7 @@ void helper_fxam_ST0(CPUX86State *env)
     if (expdif == MAXEXPD) {
         if (MANTD(temp) == 0x8000000000000000ULL) {
             env->fpus |= 0x500; /* Infinity */
-        } else {
+        } else if (MANTD(temp) & 0x8000000000000000ULL) {
             env->fpus |= 0x100; /* NaN */
         }
     } else if (expdif == 0) {
@@ -1019,7 +2340,7 @@ void helper_fxam_ST0(CPUX86State *env)
         } else {
             env->fpus |= 0x4400; /* Denormal */
         }
-    } else {
+    } else if (MANTD(temp) & 0x8000000000000000ULL) {
         env->fpus |= 0x400;
     }
 }
@@ -1171,7 +2492,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
        In 64-bit mode this is rip, rdp.
        But in either case we don't write actual data, just zeros.  */
     cpu_stq_data_ra(env, ptr + XO(legacy.fpip), env->fpip, ra); /* eip+sel; rip */
-    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
+    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), env->fpdp, ra); /* edp+sel; rdp */
 
     addr = ptr + XO(legacy.fpregs);
     for (i = 0; i < 8; i++) {
@@ -1183,6 +2504,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 
 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
+    update_mxcsr_from_sse_status(env);
     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
 }
@@ -1600,11 +2922,43 @@ void update_mxcsr_status(CPUX86State *env)
     }
     set_float_rounding_mode(rnd_type, &env->sse_status);
 
+    /* Set exception flags.  */
+    set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
+                              (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
+                              (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
+                              (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
+                              (mxcsr & FPUS_PE ? float_flag_inexact : 0),
+                              &env->sse_status);
+
     /* set denormals are zero */
     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
 
     /* set flush to zero */
-    set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
+    set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
+}
+
+void update_mxcsr_from_sse_status(CPUX86State *env)
+{
+    uint8_t flags = get_float_exception_flags(&env->sse_status);
+    /*
+        * The MXCSR denormal flag has opposite semantics to
+        * float_flag_input_denormal (the softfloat code sets that flag
+        * only when flushing input denormals to zero, but SSE sets it
+        * only when not flushing them to zero), so is not converted
+        * here.
+        */
+    env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
+                    (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
+                    (flags & float_flag_overflow ? FPUS_OE : 0) |
+                    (flags & float_flag_underflow ? FPUS_UE : 0) |
+                    (flags & float_flag_inexact ? FPUS_PE : 0) |
+                    (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
+                    0));
+}
+
+void helper_update_mxcsr(CPUX86State *env)
+{
+    update_mxcsr_from_sse_status(env);
 }
 
 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
diff --git a/qemu/target/i386/helper.h b/qemu/target/i386/helper.h
index 399cc0df99..ca55ded21a 100644
--- a/qemu/target/i386/helper.h
+++ b/qemu/target/i386/helper.h
@@ -210,6 +210,7 @@ DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 /* MMX/SSE */
 
 DEF_HELPER_2(ldmxcsr, void, env, i32)
+DEF_HELPER_1(update_mxcsr, void, env)
 DEF_HELPER_1(enter_mmx, void, env)
 DEF_HELPER_1(emms, void, env)
 DEF_HELPER_3(movq, void, env, ptr, ptr)
diff --git a/qemu/target/i386/ops_sse.h b/qemu/target/i386/ops_sse.h
index ec1ec745d0..027ff59300 100644
--- a/qemu/target/i386/ops_sse.h
+++ b/qemu/target/i386/ops_sse.h
@@ -843,6 +843,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s)
 
 void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 {
+    uint8_t old_flags = get_float_exception_flags(&env->sse_status);
     d->ZMM_S(0) = float32_div(float32_one,
                               float32_sqrt(s->ZMM_S(0), &env->sse_status),
                               &env->sse_status);
@@ -855,26 +856,33 @@ void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
     d->ZMM_S(3) = float32_div(float32_one,
                               float32_sqrt(s->ZMM_S(3), &env->sse_status),
                               &env->sse_status);
+    set_float_exception_flags(old_flags, &env->sse_status);
 }
 
 void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 {
+    uint8_t old_flags = get_float_exception_flags(&env->sse_status);
     d->ZMM_S(0) = float32_div(float32_one,
                               float32_sqrt(s->ZMM_S(0), &env->sse_status),
                               &env->sse_status);
+    set_float_exception_flags(old_flags, &env->sse_status);
 }
 
 void helper_rcpps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 {
+    uint8_t old_flags = get_float_exception_flags(&env->sse_status);
     d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status);
     d->ZMM_S(1) = float32_div(float32_one, s->ZMM_S(1), &env->sse_status);
     d->ZMM_S(2) = float32_div(float32_one, s->ZMM_S(2), &env->sse_status);
     d->ZMM_S(3) = float32_div(float32_one, s->ZMM_S(3), &env->sse_status);
+    set_float_exception_flags(old_flags, &env->sse_status);
 }
 
 void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 {
+    uint8_t old_flags = get_float_exception_flags(&env->sse_status);
     d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status);
+    set_float_exception_flags(old_flags, &env->sse_status);
 }
 
 static inline uint64_t helper_extrq(uint64_t src, int shift, int len)
@@ -1031,7 +1039,7 @@ static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 
 void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s)
 {
-    int ret;
+    FloatRelation ret;
     float32 s0, s1;
 
     s0 = d->ZMM_S(0);
@@ -1042,7 +1050,7 @@ void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s)
 
 void helper_comiss(CPUX86State *env, Reg *d, Reg *s)
 {
-    int ret;
+    FloatRelation ret;
     float32 s0, s1;
 
     s0 = d->ZMM_S(0);
@@ -1053,7 +1061,7 @@ void helper_comiss(CPUX86State *env, Reg *d, Reg *s)
 
 void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s)
 {
-    int ret;
+    FloatRelation ret;
     float64 d0, d1;
 
     d0 = d->ZMM_D(0);
@@ -1435,34 +1443,46 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 
 void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-    d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
-    d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
-    XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
-    XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
-    d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
-    d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
-    XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
-    XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
+    Reg r;
+
+    r.W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
+    r.W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
+    XMM_ONLY(r.W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
+    XMM_ONLY(r.W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
+    r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
+    r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
+    XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
+    XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
+
+    *d = r;
 }
 
 void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-    d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
-    XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
-    d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
-    XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
+    Reg r;
+
+    r.L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
+    XMM_ONLY(r.L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
+    r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
+    XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
+
+    *d = r;
 }
 
 void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-    d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
-    d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
-    XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
-    XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
-    d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
-    d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
-    XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
-    XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
+    Reg r;
+
+    r.W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
+    r.W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
+    XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
+    XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
+    r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
+    r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
+    XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
+    XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
+
+    *d = r;
 }
 
 void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1751,6 +1771,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
                                   uint32_t mode)
 {
+    uint8_t old_flags = get_float_exception_flags(&env->sse_status);
     signed char prev_rounding_mode;
 
     prev_rounding_mode = env->sse_status.float_rounding_mode;
@@ -1776,19 +1797,18 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
     d->ZMM_S(2) = float32_round_to_int(s->ZMM_S(2), &env->sse_status);
     d->ZMM_S(3) = float32_round_to_int(s->ZMM_S(3), &env->sse_status);
 
-#if 0 /* TODO */
-    if (mode & (1 << 3)) {
+    if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
         set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
                                   ~float_flag_inexact,
                                   &env->sse_status);
     }
-#endif
     env->sse_status.float_rounding_mode = prev_rounding_mode;
 }
 
 void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
                                   uint32_t mode)
 {
+    uint8_t old_flags = get_float_exception_flags(&env->sse_status);
     signed char prev_rounding_mode;
 
     prev_rounding_mode = env->sse_status.float_rounding_mode;
@@ -1812,19 +1832,18 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
     d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status);
     d->ZMM_D(1) = float64_round_to_int(s->ZMM_D(1), &env->sse_status);
 
-#if 0 /* TODO */
-    if (mode & (1 << 3)) {
+    if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
         set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
                                   ~float_flag_inexact,
                                   &env->sse_status);
     }
-#endif
     env->sse_status.float_rounding_mode = prev_rounding_mode;
 }
 
 void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
                                   uint32_t mode)
 {
+    uint8_t old_flags = get_float_exception_flags(&env->sse_status);
     signed char prev_rounding_mode;
 
     prev_rounding_mode = env->sse_status.float_rounding_mode;
@@ -1847,19 +1866,18 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
 
     d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status);
 
-#if 0 /* TODO */
-    if (mode & (1 << 3)) {
+    if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
         set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
                                   ~float_flag_inexact,
                                   &env->sse_status);
     }
-#endif
     env->sse_status.float_rounding_mode = prev_rounding_mode;
 }
 
 void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
                                   uint32_t mode)
 {
+    uint8_t old_flags = get_float_exception_flags(&env->sse_status);
     signed char prev_rounding_mode;
 
     prev_rounding_mode = env->sse_status.float_rounding_mode;
@@ -1882,13 +1900,11 @@ void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
 
     d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status);
 
-#if 0 /* TODO */
-    if (mode & (1 << 3)) {
+    if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
         set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
                                   ~float_flag_inexact,
                                   &env->sse_status);
     }
-#endif
     env->sse_status.float_rounding_mode = prev_rounding_mode;
 }
 
@@ -2076,10 +2092,10 @@ static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s,
             res = (2 << upper) - 1;
             break;
         }
-        for (j = valids - validd; j >= 0; j--) {
+        for (j = valids == upper ? valids : valids - validd; j >= 0; j--) {
             res <<= 1;
             v = 1;
-            for (i = validd; i >= 0; i--) {
+            for (i = MIN(valids - j, validd); i >= 0; i--) {
                 v &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i));
             }
             res |= v;
diff --git a/qemu/target/i386/svm.h b/qemu/target/i386/svm.h
index 30649ee9cb..35d0b8a0a8 100644
--- a/qemu/target/i386/svm.h
+++ b/qemu/target/i386/svm.h
@@ -137,6 +137,7 @@
 #define SVM_NPT_PAE         (1 << 0)
 #define SVM_NPT_LMA         (1 << 1)
 #define SVM_NPT_NXE         (1 << 2)
+#define SVM_NPT_PSE         (1 << 3)
 
 #define SVM_NPTEXIT_P       (1ULL << 0)
 #define SVM_NPTEXIT_RW      (1ULL << 1)
diff --git a/qemu/target/i386/svm_helper.c b/qemu/target/i386/svm_helper.c
index ade26593a3..4553dc6810 100644
--- a/qemu/target/i386/svm_helper.c
+++ b/qemu/target/i386/svm_helper.c
@@ -152,16 +152,21 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
 
     nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
                                                           control.nested_ctl));
+
+    env->nested_pg_mode = 0;
+
     if (nested_ctl & SVM_NPT_ENABLED) {
         env->nested_cr3 = x86_ldq_phys(cs,
                                 env->vm_vmcb + offsetof(struct vmcb,
                                                         control.nested_cr3));
         env->hflags2 |= HF2_NPT_MASK;
 
-        env->nested_pg_mode = 0;
         if (env->cr[4] & CR4_PAE_MASK) {
             env->nested_pg_mode |= SVM_NPT_PAE;
         }
+        if (env->cr[4] & CR4_PSE_MASK) {
+            env->nested_pg_mode |= SVM_NPT_PSE;
+        }
         if (env->hflags & HF_LMA_MASK) {
             env->nested_pg_mode |= SVM_NPT_LMA;
         }
diff --git a/qemu/target/i386/translate.c b/qemu/target/i386/translate.c
index 741102bed4..b92f3f373f 100644
--- a/qemu/target/i386/translate.c
+++ b/qemu/target/i386/translate.c
@@ -1325,9 +1325,6 @@ static inline void gen_ins(DisasContext *s, MemOp ot)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
 
-    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start(tcg_ctx);
-    }
     gen_string_movl_A0_EDI(s);
     /* Note: we must do this dummy write first to be restartable in
        case of page fault. */
@@ -1340,18 +1337,12 @@ static inline void gen_ins(DisasContext *s, MemOp ot)
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_EDI);
     gen_bpt_io(s, s->tmp2_i32, ot);
-    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end(tcg_ctx);
-    }
 }
 
 static inline void gen_outs(DisasContext *s, MemOp ot)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
 
-    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start(tcg_ctx);
-    }
     gen_string_movl_A0_ESI(s);
     gen_op_ld_v(s, ot, s->T0, s->A0);
 
@@ -1362,9 +1353,6 @@ static inline void gen_outs(DisasContext *s, MemOp ot)
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_ESI);
     gen_bpt_io(s, s->tmp2_i32, ot);
-    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end(tcg_ctx);
-    }
 }
 
 /* same method as Valgrind : we generate jumps to current or next
@@ -7017,6 +7005,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+            /* jump generated by gen_repz_ins */
         } else {
             gen_ins(s, ot);
             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -7030,8 +7019,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_ext16u_tl(tcg_ctx, s->T0, tcg_ctx->cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes) | 4);
+        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+            gen_io_start(tcg_ctx);
+        }
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+            /* jump generated by gen_repz_outs */
         } else {
             gen_outs(s, ot);
             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -7881,6 +7874,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             l1 = gen_new_label(tcg_ctx);
             l2 = gen_new_label(tcg_ctx);
             l3 = gen_new_label(tcg_ctx);
+            gen_update_cc_op(s);
             b &= 3;
             switch(b) {
             case 0: /* loopnz */
@@ -8314,12 +8308,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         CASE_MODRM_OP(4): /* smsw */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
             tcg_gen_ld_tl(tcg_ctx, s->T0, tcg_ctx->cpu_env, offsetof(CPUX86State, cr[0]));
-            if (CODE64(s)) {
-                mod = (modrm >> 6) & 3;
-                ot = (mod != 3 ? MO_16 : s->dflag);
-            } else {
-                ot = MO_16;
-            }
+            /*
+             * In 32-bit mode, the higher 16 bits of the destination
+             * register are undefined.  In practice CR0[31:0] is stored
+             * just like in 64-bit mode.
+             */
+            mod = (modrm >> 6) & 3;
+            ot = (mod != 3 ? MO_16 : s->dflag);
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
         case 0xee: /* rdpkru */
@@ -8774,7 +8769,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     gen_helper_read_crN(tcg_ctx, s->T0, tcg_ctx->cpu_env, tcg_const_i32(tcg_ctx, reg));
                     gen_op_mov_reg_v(s, ot, rm, s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-                        gen_io_end(tcg_ctx);
+                        gen_jmp(s, s->pc - s->cs_base);
                     }
                 }
                 break;
@@ -8892,6 +8887,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
+            gen_helper_update_mxcsr(tcg_ctx, tcg_ctx->cpu_env);
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(tcg_ctx, s->T0, tcg_ctx->cpu_env, offsetof(CPUX86State, mxcsr));
             gen_op_st_v(s, MO_32, s->T0, s->A0);
diff --git a/qemu/target/m68k/cpu.c b/qemu/target/m68k/cpu.c
index 6b636b80eb..5802dbdf24 100644
--- a/qemu/target/m68k/cpu.c
+++ b/qemu/target/m68k/cpu.c
@@ -236,12 +236,6 @@ static void m68k_cpu_class_init(CPUClass *c)
     cc->tcg_initialize = m68k_tcg_init;
 }
 
-#define DEFINE_M68K_CPU_TYPE(cpu_model, initfn) \
-    {                                           \
-        .name = cpu_model,  \
-        .initfn = initfn,                \
-    }
-
 struct M68kCPUInfo {
     const char *name;
     void (*initfn)(CPUState *obj);
diff --git a/qemu/target/m68k/fpu_helper.c b/qemu/target/m68k/fpu_helper.c
index 3f544a0572..7f67fa10db 100644
--- a/qemu/target/m68k/fpu_helper.c
+++ b/qemu/target/m68k/fpu_helper.c
@@ -149,7 +149,7 @@ void cpu_m68k_set_fpcr(CPUM68KState *env, uint32_t val)
 
 void HELPER(fitrunc)(CPUM68KState *env, FPReg *res, FPReg *val)
 {
-    int rounding_mode = get_float_rounding_mode(&env->fp_status);
+    FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status);
     set_float_rounding_mode(float_round_to_zero, &env->fp_status);
     res->d = floatx80_round_to_int(val->d, &env->fp_status);
     set_float_rounding_mode(rounding_mode, &env->fp_status);
@@ -300,7 +300,7 @@ void HELPER(fdmul)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 
 void HELPER(fsglmul)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 {
-    int rounding_mode = get_float_rounding_mode(&env->fp_status);
+    FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status);
     floatx80 a, b;
 
     PREC_BEGIN(32);
@@ -333,7 +333,7 @@ void HELPER(fddiv)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 
 void HELPER(fsgldiv)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 {
-    int rounding_mode = get_float_rounding_mode(&env->fp_status);
+    FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status);
     floatx80 a, b;
 
     PREC_BEGIN(32);
@@ -642,6 +642,11 @@ void HELPER(fatanh)(CPUM68KState *env, FPReg *res, FPReg *val)
     res->d = floatx80_atanh(val->d, &env->fp_status);
 }
 
+void HELPER(fetoxm1)(CPUM68KState *env, FPReg *res, FPReg *val)
+{
+    res->d = floatx80_etoxm1(val->d, &env->fp_status);
+}
+
 void HELPER(ftanh)(CPUM68KState *env, FPReg *res, FPReg *val)
 {
     res->d = floatx80_tanh(val->d, &env->fp_status);
diff --git a/qemu/target/m68k/helper.c b/qemu/target/m68k/helper.c
index b0f2e298e7..fb441ee935 100644
--- a/qemu/target/m68k/helper.c
+++ b/qemu/target/m68k/helper.c
@@ -284,7 +284,7 @@ static int get_physical_address(CPUM68KState *env, hwaddr *physical,
                 /* Transparent Translation Register bit */
                 env->mmu.mmusr = M68K_MMU_T_040 | M68K_MMU_R_040;
             }
-            *physical = address & TARGET_PAGE_MASK;
+            *physical = address;
             *page_size = TARGET_PAGE_SIZE;
             return 0;
         }
@@ -412,7 +412,7 @@ static int get_physical_address(CPUM68KState *env, hwaddr *physical,
     }
     *page_size = 1 << page_bits;
     page_mask = ~(*page_size - 1);
-    *physical = next & page_mask;
+    *physical = (next & page_mask) + (address & (*page_size - 1));
 
     if (access_type & ACCESS_PTEST) {
         env->mmu.mmusr |= next & M68K_MMU_SR_MASK_040;
@@ -461,6 +461,7 @@ hwaddr m68k_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     if (env->sr & SR_S) {
         access_type |= ACCESS_SUPER;
     }
+
     if (get_physical_address(env, &phys_addr, &prot,
                              addr, access_type, &page_size) != 0) {
         return -1;
@@ -525,10 +526,8 @@ bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     ret = get_physical_address(&cpu->env, &physical, &prot,
                                address, access_type, &page_size);
     if (likely(ret == 0)) {
-        address &= TARGET_PAGE_MASK;
-        physical += address & (page_size - 1);
-        tlb_set_page(cs, address, physical,
-                     prot, mmu_idx, TARGET_PAGE_SIZE);
+        tlb_set_page(cs, address & TARGET_PAGE_MASK,
+                     physical & TARGET_PAGE_MASK, prot, mmu_idx, page_size);
         return true;
     }
 
@@ -1015,9 +1014,8 @@ void HELPER(ptest)(CPUM68KState *env, uint32_t addr, uint32_t is_read)
     ret = get_physical_address(env, &physical, &prot, addr,
                                access_type, &page_size);
     if (ret == 0) {
-        addr &= TARGET_PAGE_MASK;
-        physical += addr & (page_size - 1);
-        tlb_set_page(env_cpu(env), addr, physical,
+        tlb_set_page(env_cpu(env), addr & TARGET_PAGE_MASK,
+                     physical & TARGET_PAGE_MASK,
                      prot, access_type & ACCESS_SUPER ?
                      MMU_KERNEL_IDX : MMU_USER_IDX, page_size);
     }
diff --git a/qemu/target/m68k/helper.h b/qemu/target/m68k/helper.h
index 413f88dc65..f9978b0e20 100644
--- a/qemu/target/m68k/helper.h
+++ b/qemu/target/m68k/helper.h
@@ -86,6 +86,7 @@ DEF_HELPER_3(fatan, void, env, fp, fp)
 DEF_HELPER_3(fasin, void, env, fp, fp)
 DEF_HELPER_3(facos, void, env, fp, fp)
 DEF_HELPER_3(fatanh, void, env, fp, fp)
+DEF_HELPER_3(fetoxm1, void, env, fp, fp)
 DEF_HELPER_3(ftanh, void, env, fp, fp)
 DEF_HELPER_3(fsinh, void, env, fp, fp)
 DEF_HELPER_3(fcosh, void, env, fp, fp)
diff --git a/qemu/target/m68k/softfloat.c b/qemu/target/m68k/softfloat.c
index 24c313ed69..b6d0ed7acf 100644
--- a/qemu/target/m68k/softfloat.c
+++ b/qemu/target/m68k/softfloat.c
@@ -42,89 +42,6 @@ static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status)
     return a;
 }
 
-/*
- * Returns the modulo remainder of the extended double-precision floating-point
- * value `a' with respect to the corresponding value `b'.
- */
-
-floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
-{
-    flag aSign, zSign;
-    int32_t aExp, bExp, expDiff;
-    uint64_t aSig0, aSig1, bSig;
-    uint64_t qTemp, term0, term1;
-
-    aSig0 = extractFloatx80Frac(a);
-    aExp = extractFloatx80Exp(a);
-    aSign = extractFloatx80Sign(a);
-    bSig = extractFloatx80Frac(b);
-    bExp = extractFloatx80Exp(b);
-
-    if (aExp == 0x7FFF) {
-        if ((uint64_t) (aSig0 << 1)
-            || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        goto invalid;
-    }
-    if (bExp == 0x7FFF) {
-        if ((uint64_t) (bSig << 1)) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        return a;
-    }
-    if (bExp == 0) {
-        if (bSig == 0) {
-        invalid:
-            float_raise(float_flag_invalid, status);
-            return floatx80_default_nan(status);
-        }
-        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
-    }
-    if (aExp == 0) {
-        if ((uint64_t) (aSig0 << 1) == 0) {
-            return a;
-        }
-        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
-    }
-    bSig |= UINT64_C(0x8000000000000000);
-    zSign = aSign;
-    expDiff = aExp - bExp;
-    aSig1 = 0;
-    if (expDiff < 0) {
-        return a;
-    }
-    qTemp = (bSig <= aSig0);
-    if (qTemp) {
-        aSig0 -= bSig;
-    }
-    expDiff -= 64;
-    while (0 < expDiff) {
-        qTemp = estimateDiv128To64(aSig0, aSig1, bSig);
-        qTemp = (2 < qTemp) ? qTemp - 2 : 0;
-        mul64To128(bSig, qTemp, &term0, &term1);
-        sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1);
-        shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1);
-        expDiff -= 62;
-    }
-    expDiff += 64;
-    if (0 < expDiff) {
-        qTemp = estimateDiv128To64(aSig0, aSig1, bSig);
-        qTemp = (2 < qTemp) ? qTemp - 2 : 0;
-        qTemp >>= 64 - expDiff;
-        mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1);
-        sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1);
-        shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1);
-        while (le128(term0, term1, aSig0, aSig1)) {
-            ++qTemp;
-            sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1);
-        }
-    }
-    return
-        normalizeRoundAndPackFloatx80(
-            80, zSign, bExp + expDiff, aSig0, aSig1, status);
-}
-
 /*
  * Returns the mantissa of the extended double-precision floating-point
  * value `a'.
@@ -132,7 +49,7 @@ floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
 
 floatx80 floatx80_getman(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -166,7 +83,7 @@ floatx80 floatx80_getman(floatx80 a, float_status *status)
 
 floatx80 floatx80_getexp(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -202,7 +119,7 @@ floatx80 floatx80_getexp(floatx80 a, float_status *status)
 
 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status)
 {
-    flag aSign, bSign;
+    bool aSign, bSign;
     int32_t aExp, bExp, shiftCount;
     uint64_t aSig, bSig;
 
@@ -258,7 +175,7 @@ floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status)
 
 floatx80 floatx80_move(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -306,7 +223,7 @@ static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig)
 
 floatx80 floatx80_lognp1(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig, fSig;
 
@@ -505,7 +422,7 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status)
 
 floatx80 floatx80_logn(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig, fSig;
 
@@ -673,7 +590,7 @@ floatx80 floatx80_logn(floatx80 a, float_status *status)
 
 floatx80 floatx80_log10(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -730,7 +647,7 @@ floatx80 floatx80_log10(floatx80 a, float_status *status)
 
 floatx80 floatx80_log2(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -797,7 +714,7 @@ floatx80 floatx80_log2(floatx80 a, float_status *status)
 
 floatx80 floatx80_etox(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -805,7 +722,7 @@ floatx80 floatx80_etox(floatx80 a, float_status *status)
 
     int32_t compact, n, j, k, m, m1;
     floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale;
-    flag adjflag;
+    bool adjflag;
 
     aSig = extractFloatx80Frac(a);
     aExp = extractFloatx80Exp(a);
@@ -981,7 +898,7 @@ floatx80 floatx80_etox(floatx80 a, float_status *status)
 
 floatx80 floatx80_twotox(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -1131,7 +1048,7 @@ floatx80 floatx80_twotox(floatx80 a, float_status *status)
 
 floatx80 floatx80_tentox(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -1286,7 +1203,7 @@ floatx80 floatx80_tentox(floatx80 a, float_status *status)
 
 floatx80 floatx80_tan(floatx80 a, float_status *status)
 {
-    flag aSign, xSign;
+    bool aSign, xSign;
     int32_t aExp, xExp;
     uint64_t aSig, xSig;
 
@@ -1295,7 +1212,7 @@ floatx80 floatx80_tan(floatx80 a, float_status *status)
     int32_t compact, l, n, j;
     floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2;
     float32 twoto63;
-    flag endflag;
+    bool endflag;
 
     aSig = extractFloatx80Frac(a);
     aExp = extractFloatx80Exp(a);
@@ -1344,10 +1261,10 @@ floatx80 floatx80_tan(floatx80 a, float_status *status)
             xExp -= 0x3FFF;
             if (xExp <= 28) {
                 l = 0;
-                endflag = 1;
+                endflag = true;
             } else {
                 l = xExp - 27;
-                endflag = 0;
+                endflag = false;
             }
             invtwopi = packFloatx80(0, 0x3FFE - l,
                                     UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
@@ -1372,7 +1289,7 @@ floatx80 floatx80_tan(floatx80 a, float_status *status)
             fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
             fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
 
-            if (endflag > 0) {
+            if (endflag) {
                 n = floatx80_to_int32(fp2, status);
                 goto tancont;
             }
@@ -1496,7 +1413,7 @@ floatx80 floatx80_tan(floatx80 a, float_status *status)
 
 floatx80 floatx80_sin(floatx80 a, float_status *status)
 {
-    flag aSign, xSign;
+    bool aSign, xSign;
     int32_t aExp, xExp;
     uint64_t aSig, xSig;
 
@@ -1505,7 +1422,7 @@ floatx80 floatx80_sin(floatx80 a, float_status *status)
     int32_t compact, l, n, j;
     floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
     float32 posneg1, twoto63;
-    flag endflag;
+    bool endflag;
 
     aSig = extractFloatx80Frac(a);
     aExp = extractFloatx80Exp(a);
@@ -1554,10 +1471,10 @@ floatx80 floatx80_sin(floatx80 a, float_status *status)
             xExp -= 0x3FFF;
             if (xExp <= 28) {
                 l = 0;
-                endflag = 1;
+                endflag = true;
             } else {
                 l = xExp - 27;
-                endflag = 0;
+                endflag = false;
             }
             invtwopi = packFloatx80(0, 0x3FFE - l,
                                     UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
@@ -1582,7 +1499,7 @@ floatx80 floatx80_sin(floatx80 a, float_status *status)
             fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
             fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
 
-            if (endflag > 0) {
+            if (endflag) {
                 n = floatx80_to_int32(fp2, status);
                 goto sincont;
             }
@@ -1735,7 +1652,7 @@ floatx80 floatx80_sin(floatx80 a, float_status *status)
 
 floatx80 floatx80_cos(floatx80 a, float_status *status)
 {
-    flag aSign, xSign;
+    bool aSign, xSign;
     int32_t aExp, xExp;
     uint64_t aSig, xSig;
 
@@ -1744,7 +1661,7 @@ floatx80 floatx80_cos(floatx80 a, float_status *status)
     int32_t compact, l, n, j;
     floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
     float32 posneg1, twoto63;
-    flag endflag;
+    bool endflag;
 
     aSig = extractFloatx80Frac(a);
     aExp = extractFloatx80Exp(a);
@@ -1793,10 +1710,10 @@ floatx80 floatx80_cos(floatx80 a, float_status *status)
             xExp -= 0x3FFF;
             if (xExp <= 28) {
                 l = 0;
-                endflag = 1;
+                endflag = true;
             } else {
                 l = xExp - 27;
-                endflag = 0;
+                endflag = false;
             }
             invtwopi = packFloatx80(0, 0x3FFE - l,
                                     UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
@@ -1821,7 +1738,7 @@ floatx80 floatx80_cos(floatx80 a, float_status *status)
             fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
             fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
 
-            if (endflag > 0) {
+            if (endflag) {
                 n = floatx80_to_int32(fp2, status);
                 goto sincont;
             }
@@ -1972,7 +1889,7 @@ floatx80 floatx80_cos(floatx80 a, float_status *status)
 
 floatx80 floatx80_atan(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -2169,7 +2086,7 @@ floatx80 floatx80_atan(floatx80 a, float_status *status)
 
 floatx80 floatx80_asin(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -2234,7 +2151,7 @@ floatx80 floatx80_asin(floatx80 a, float_status *status)
 
 floatx80 floatx80_acos(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -2303,7 +2220,7 @@ floatx80 floatx80_acos(floatx80 a, float_status *status)
 
 floatx80 floatx80_atanh(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -2368,7 +2285,7 @@ floatx80 floatx80_atanh(floatx80 a, float_status *status)
 
 floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
@@ -2620,7 +2537,7 @@ floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
 
 floatx80 floatx80_tanh(floatx80 a, float_status *status)
 {
-    flag aSign, vSign;
+    bool aSign, vSign;
     int32_t aExp, vExp;
     uint64_t aSig, vSig;
 
@@ -2735,7 +2652,7 @@ floatx80 floatx80_tanh(floatx80 a, float_status *status)
 
 floatx80 floatx80_sinh(floatx80 a, float_status *status)
 {
-    flag aSign;
+    bool aSign;
     int32_t aExp;
     uint64_t aSig;
 
diff --git a/qemu/target/m68k/softfloat.h b/qemu/target/m68k/softfloat.h
index 365ef6ac7a..4bb9567134 100644
--- a/qemu/target/m68k/softfloat.h
+++ b/qemu/target/m68k/softfloat.h
@@ -23,7 +23,6 @@
 #define TARGET_M68K_SOFTFLOAT_H
 #include "fpu/softfloat.h"
 
-floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status);
 floatx80 floatx80_getman(floatx80 a, float_status *status);
 floatx80 floatx80_getexp(floatx80 a, float_status *status);
 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status);
diff --git a/qemu/target/m68k/translate.c b/qemu/target/m68k/translate.c
index 5d0fa7497a..fdb4ccb977 100644
--- a/qemu/target/m68k/translate.c
+++ b/qemu/target/m68k/translate.c
@@ -5077,6 +5077,20 @@ static void gen_op_fmove_fcr(CPUM68KState *env, DisasContext *s,
             gen_store_fcr(s, AREG(insn, 0), mask);
         }
         return;
+    case 7: /* Immediate */
+        if (REG(insn, 0) == 4) {
+            if (is_write ||
+                (mask != M68K_FPIAR && mask != M68K_FPSR &&
+                 mask != M68K_FPCR)) {
+                gen_exception(s, s->base.pc_next, EXCP_ILLEGAL);
+                return;
+            }
+            tmp = tcg_const_i32(tcg_ctx, read_im32(env, s));
+            gen_store_fcr(s, tmp, mask);
+            tcg_temp_free(tcg_ctx, tmp);
+            return;
+        }
+        break;
     default:
         break;
     }
@@ -5289,6 +5303,9 @@ DISAS_INSN(fpu)
     case 0x06: /* flognp1 */
         gen_helper_flognp1(tcg_ctx, tcg_ctx->cpu_env, cpu_dest, cpu_src);
         break;
+    case 0x08: /* fetoxm1 */
+        gen_helper_fetoxm1(tcg_ctx, tcg_ctx->cpu_env, cpu_dest, cpu_src);
+        break;
     case 0x09: /* ftanh */
         gen_helper_ftanh(tcg_ctx, tcg_ctx->cpu_env, cpu_dest, cpu_src);
         break;
diff --git a/qemu/target/mips/cp0_helper.c b/qemu/target/mips/cp0_helper.c
index e3600c26d7..9665c0beb5 100644
--- a/qemu/target/mips/cp0_helper.c
+++ b/qemu/target/mips/cp0_helper.c
@@ -378,16 +378,9 @@ target_ulong helper_mftc0_entryhi(CPUMIPSState *env)
 target_ulong helper_mftc0_cause(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    int32_t tccause;
     CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
-    if (other_tc == other->current_tc) {
-        tccause = other->CP0_Cause;
-    } else {
-        tccause = other->CP0_Cause;
-    }
-
-    return tccause;
+    return other->CP0_Cause;
 }
 
 target_ulong helper_mftc0_status(CPUMIPSState *env)
@@ -877,6 +870,7 @@ void helper_mtc0_memorymapid(CPUMIPSState *env, target_ulong arg1)
 
 void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask)
 {
+    struct uc_struct *uc = env->uc;
     uint64_t mask = arg1 >> (TARGET_PAGE_BITS + 1);
     if (!(env->insn_flags & ISA_MIPS32R6) || (arg1 == ~0) ||
         (mask == 0x0000 || mask == 0x0003 || mask == 0x000F ||
@@ -1113,6 +1107,7 @@ void helper_mthc0_saar(CPUMIPSState *env, target_ulong arg1)
 
 void helper_mtc0_entryhi(CPUMIPSState *env, target_ulong arg1)
 {
+    struct uc_struct *uc = env->uc;
     target_ulong old, val, mask;
     mask = (TARGET_PAGE_MASK << 1) | env->CP0_EntryHi_ASID_mask;
     if (((env->CP0_Config4 >> CP0C4_IE) & 0x3) >= 2) {
diff --git a/qemu/target/mips/cpu-param.h b/qemu/target/mips/cpu-param.h
index f073f379fc..57caf5d588 100644
--- a/qemu/target/mips/cpu-param.h
+++ b/qemu/target/mips/cpu-param.h
@@ -19,7 +19,8 @@
 #define TARGET_PHYS_ADDR_SPACE_BITS 40
 #define TARGET_VIRT_ADDR_SPACE_BITS 32
 #endif
-#define TARGET_PAGE_BITS 12
+#define TARGET_PAGE_BITS_VARY
+#define TARGET_PAGE_BITS_MIN 12
 #define NB_MMU_MODES 4
 
 #endif
diff --git a/qemu/target/mips/cpu.h b/qemu/target/mips/cpu.h
index 95f6bf5077..4ddd4321d3 100644
--- a/qemu/target/mips/cpu.h
+++ b/qemu/target/mips/cpu.h
@@ -940,7 +940,35 @@ struct CPUMIPSState {
 #define CP0C5_UFR          2
 #define CP0C5_NFExists     0
     int32_t CP0_Config6;
+    int32_t CP0_Config6_rw_bitmask;
+#define CP0C6_BPPASS          31
+#define CP0C6_KPOS            24
+#define CP0C6_KE              23
+#define CP0C6_VTLBONLY        22
+#define CP0C6_LASX            21
+#define CP0C6_SSEN            20
+#define CP0C6_DISDRTIME       19
+#define CP0C6_PIXNUEN         18
+#define CP0C6_SCRAND          17
+#define CP0C6_LLEXCEN         16
+#define CP0C6_DISVC           15
+#define CP0C6_VCLRU           14
+#define CP0C6_DCLRU           13
+#define CP0C6_PIXUEN          12
+#define CP0C6_DISBLKLYEN      11
+#define CP0C6_UMEMUALEN       10
+#define CP0C6_SFBEN           8
+#define CP0C6_FLTINT          7
+#define CP0C6_VLTINT          6
+#define CP0C6_DISBTB          5
+#define CP0C6_STPREFCTL       2
+#define CP0C6_INSTPREF        1
+#define CP0C6_DATAPREF        0
     int32_t CP0_Config7;
+    int64_t CP0_Config7_rw_bitmask;
+#define CP0C7_NAPCGEN       2
+#define CP0C7_UNIMUEN       1
+#define CP0C7_VFPUCGEN      0
     uint64_t CP0_LLAddr;
     uint64_t CP0_MAAR[MIPS_MAAR_MAX];
     int32_t CP0_MAARI;
diff --git a/qemu/target/mips/fpu_helper.c b/qemu/target/mips/fpu_helper.c
index 34431468af..027d8c0fa9 100644
--- a/qemu/target/mips/fpu_helper.c
+++ b/qemu/target/mips/fpu_helper.c
@@ -28,7 +28,6 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "exec/memop.h"
-//#include "sysemu/kvm.h"
 #include "fpu/softfloat.h"
 
 
@@ -188,43 +187,48 @@ void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t fs, uint32_t rt)
     }
 }
 
-int ieee_ex_to_mips(int xcpt)
+static inline int ieee_to_mips_xcpt(int ieee_xcpt)
 {
-    int ret = 0;
-    if (xcpt) {
-        if (xcpt & float_flag_invalid) {
-            ret |= FP_INVALID;
-        }
-        if (xcpt & float_flag_overflow) {
-            ret |= FP_OVERFLOW;
-        }
-        if (xcpt & float_flag_underflow) {
-            ret |= FP_UNDERFLOW;
-        }
-        if (xcpt & float_flag_divbyzero) {
-            ret |= FP_DIV0;
-        }
-        if (xcpt & float_flag_inexact) {
-            ret |= FP_INEXACT;
-        }
+    int mips_xcpt = 0;
+
+    if (ieee_xcpt & float_flag_invalid) {
+        mips_xcpt |= FP_INVALID;
+    }
+    if (ieee_xcpt & float_flag_overflow) {
+        mips_xcpt |= FP_OVERFLOW;
+    }
+    if (ieee_xcpt & float_flag_underflow) {
+        mips_xcpt |= FP_UNDERFLOW;
     }
-    return ret;
+    if (ieee_xcpt & float_flag_divbyzero) {
+        mips_xcpt |= FP_DIV0;
+    }
+    if (ieee_xcpt & float_flag_inexact) {
+        mips_xcpt |= FP_INEXACT;
+    }
+
+    return mips_xcpt;
 }
 
 static inline void update_fcr31(CPUMIPSState *env, uintptr_t pc)
 {
-    int tmp = ieee_ex_to_mips(get_float_exception_flags(
-                                  &env->active_fpu.fp_status));
+    int ieee_exception_flags = get_float_exception_flags(
+                                   &env->active_fpu.fp_status);
+    int mips_exception_flags = 0;
 
-    SET_FP_CAUSE(env->active_fpu.fcr31, tmp);
+    if (ieee_exception_flags) {
+        mips_exception_flags = ieee_to_mips_xcpt(ieee_exception_flags);
+    }
 
-    if (tmp) {
+    SET_FP_CAUSE(env->active_fpu.fcr31, mips_exception_flags);
+
+    if (mips_exception_flags)  {
         set_float_exception_flags(0, &env->active_fpu.fp_status);
 
-        if (GET_FP_ENABLE(env->active_fpu.fcr31) & tmp) {
+        if (GET_FP_ENABLE(env->active_fpu.fcr31) & mips_exception_flags) {
             do_raise_exception(env, EXCP_FPE, pc);
         } else {
-            UPDATE_FP_FLAGS(env->active_fpu.fcr31, tmp);
+            UPDATE_FP_FLAGS(env->active_fpu.fcr31, mips_exception_flags);
         }
     }
 }
@@ -1058,14 +1062,14 @@ uint32_t helper_float_recip1_s(CPUMIPSState *env, uint32_t fst0)
 
 uint64_t helper_float_recip1_ps(CPUMIPSState *env, uint64_t fdt0)
 {
-    uint32_t fst2;
+    uint32_t fstl2;
     uint32_t fsth2;
 
-    fst2 = float32_div(float32_one, fdt0 & 0XFFFFFFFF,
-                       &env->active_fpu.fp_status);
+    fstl2 = float32_div(float32_one, fdt0 & 0XFFFFFFFF,
+                        &env->active_fpu.fp_status);
     fsth2 = float32_div(float32_one, fdt0 >> 32, &env->active_fpu.fp_status);
     update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fst2;
+    return ((uint64_t)fsth2 << 32) | fstl2;
 }
 
 uint64_t helper_float_rsqrt1_d(CPUMIPSState *env, uint64_t fdt0)
@@ -1090,31 +1094,34 @@ uint32_t helper_float_rsqrt1_s(CPUMIPSState *env, uint32_t fst0)
 
 uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, uint64_t fdt0)
 {
-    uint32_t fst2;
+    uint32_t fstl2;
     uint32_t fsth2;
 
-    fst2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
+    fstl2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     fsth2 = float32_sqrt(fdt0 >> 32, &env->active_fpu.fp_status);
-    fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
+    fstl2 = float32_div(float32_one, fstl2, &env->active_fpu.fp_status);
     fsth2 = float32_div(float32_one, fsth2, &env->active_fpu.fp_status);
     update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fst2;
+    return ((uint64_t)fsth2 << 32) | fstl2;
 }
 
-#define FLOAT_RINT(name, bits)                                              \
-uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env,                 \
-                                         uint ## bits ## _t fs)             \
-{                                                                           \
-    uint ## bits ## _t fdret;                                               \
-                                                                            \
-    fdret = float ## bits ## _round_to_int(fs, &env->active_fpu.fp_status); \
-    update_fcr31(env, GETPC());                                             \
-    return fdret;                                                           \
+uint64_t helper_float_rint_d(CPUMIPSState *env, uint64_t fs)
+{
+    uint64_t fdret;
+
+    fdret = float64_round_to_int(fs, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdret;
 }
 
-FLOAT_RINT(rint_s, 32)
-FLOAT_RINT(rint_d, 64)
-#undef FLOAT_RINT
+uint32_t helper_float_rint_s(CPUMIPSState *env, uint32_t fs)
+{
+    uint32_t fdret;
+
+    fdret = float32_round_to_int(fs, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdret;
+}
 
 #define FLOAT_CLASS_SIGNALING_NAN      0x001
 #define FLOAT_CLASS_QUIET_NAN          0x002
@@ -1127,91 +1134,220 @@ FLOAT_RINT(rint_d, 64)
 #define FLOAT_CLASS_POSITIVE_SUBNORMAL 0x100
 #define FLOAT_CLASS_POSITIVE_ZERO      0x200
 
-#define FLOAT_CLASS(name, bits)                                      \
-uint ## bits ## _t float_ ## name(uint ## bits ## _t arg,            \
-                                  float_status *status)              \
-{                                                                    \
-    if (float ## bits ## _is_signaling_nan(arg, status)) {           \
-        return FLOAT_CLASS_SIGNALING_NAN;                            \
-    } else if (float ## bits ## _is_quiet_nan(arg, status)) {        \
-        return FLOAT_CLASS_QUIET_NAN;                                \
-    } else if (float ## bits ## _is_neg(arg)) {                      \
-        if (float ## bits ## _is_infinity(arg)) {                    \
-            return FLOAT_CLASS_NEGATIVE_INFINITY;                    \
-        } else if (float ## bits ## _is_zero(arg)) {                 \
-            return FLOAT_CLASS_NEGATIVE_ZERO;                        \
-        } else if (float ## bits ## _is_zero_or_denormal(arg)) {     \
-            return FLOAT_CLASS_NEGATIVE_SUBNORMAL;                   \
-        } else {                                                     \
-            return FLOAT_CLASS_NEGATIVE_NORMAL;                      \
-        }                                                            \
-    } else {                                                         \
-        if (float ## bits ## _is_infinity(arg)) {                    \
-            return FLOAT_CLASS_POSITIVE_INFINITY;                    \
-        } else if (float ## bits ## _is_zero(arg)) {                 \
-            return FLOAT_CLASS_POSITIVE_ZERO;                        \
-        } else if (float ## bits ## _is_zero_or_denormal(arg)) {     \
-            return FLOAT_CLASS_POSITIVE_SUBNORMAL;                   \
-        } else {                                                     \
-            return FLOAT_CLASS_POSITIVE_NORMAL;                      \
-        }                                                            \
-    }                                                                \
-}                                                                    \
-                                                                     \
-uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env,          \
-                                         uint ## bits ## _t arg)     \
-{                                                                    \
-    return float_ ## name(arg, &env->active_fpu.fp_status);          \
-}
-
-FLOAT_CLASS(class_s, 32)
-FLOAT_CLASS(class_d, 64)
-#undef FLOAT_CLASS
+uint64_t float_class_d(uint64_t arg, float_status *status)
+{
+    if (float64_is_signaling_nan(arg, status)) {
+        return FLOAT_CLASS_SIGNALING_NAN;
+    } else if (float64_is_quiet_nan(arg, status)) {
+        return FLOAT_CLASS_QUIET_NAN;
+    } else if (float64_is_neg(arg)) {
+        if (float64_is_infinity(arg)) {
+            return FLOAT_CLASS_NEGATIVE_INFINITY;
+        } else if (float64_is_zero(arg)) {
+            return FLOAT_CLASS_NEGATIVE_ZERO;
+        } else if (float64_is_zero_or_denormal(arg)) {
+            return FLOAT_CLASS_NEGATIVE_SUBNORMAL;
+        } else {
+            return FLOAT_CLASS_NEGATIVE_NORMAL;
+        }
+    } else {
+        if (float64_is_infinity(arg)) {
+            return FLOAT_CLASS_POSITIVE_INFINITY;
+        } else if (float64_is_zero(arg)) {
+            return FLOAT_CLASS_POSITIVE_ZERO;
+        } else if (float64_is_zero_or_denormal(arg)) {
+            return FLOAT_CLASS_POSITIVE_SUBNORMAL;
+        } else {
+            return FLOAT_CLASS_POSITIVE_NORMAL;
+        }
+    }
+}
+
+uint64_t helper_float_class_d(CPUMIPSState *env, uint64_t arg)
+{
+    return float_class_d(arg, &env->active_fpu.fp_status);
+}
+
+uint32_t float_class_s(uint32_t arg, float_status *status)
+{
+    if (float32_is_signaling_nan(arg, status)) {
+        return FLOAT_CLASS_SIGNALING_NAN;
+    } else if (float32_is_quiet_nan(arg, status)) {
+        return FLOAT_CLASS_QUIET_NAN;
+    } else if (float32_is_neg(arg)) {
+        if (float32_is_infinity(arg)) {
+            return FLOAT_CLASS_NEGATIVE_INFINITY;
+        } else if (float32_is_zero(arg)) {
+            return FLOAT_CLASS_NEGATIVE_ZERO;
+        } else if (float32_is_zero_or_denormal(arg)) {
+            return FLOAT_CLASS_NEGATIVE_SUBNORMAL;
+        } else {
+            return FLOAT_CLASS_NEGATIVE_NORMAL;
+        }
+    } else {
+        if (float32_is_infinity(arg)) {
+            return FLOAT_CLASS_POSITIVE_INFINITY;
+        } else if (float32_is_zero(arg)) {
+            return FLOAT_CLASS_POSITIVE_ZERO;
+        } else if (float32_is_zero_or_denormal(arg)) {
+            return FLOAT_CLASS_POSITIVE_SUBNORMAL;
+        } else {
+            return FLOAT_CLASS_POSITIVE_NORMAL;
+        }
+    }
+}
+
+uint32_t helper_float_class_s(CPUMIPSState *env, uint32_t arg)
+{
+    return float_class_s(arg, &env->active_fpu.fp_status);
+}
 
 /* binary operations */
-#define FLOAT_BINOP(name)                                          \
-uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,            \
-                                     uint64_t fdt0, uint64_t fdt1) \
-{                                                                  \
-    uint64_t dt2;                                                  \
-                                                                   \
-    dt2 = float64_ ## name(fdt0, fdt1, &env->active_fpu.fp_status);\
-    update_fcr31(env, GETPC());                                    \
-    return dt2;                                                    \
-}                                                                  \
-                                                                   \
-uint32_t helper_float_ ## name ## _s(CPUMIPSState *env,            \
-                                     uint32_t fst0, uint32_t fst1) \
-{                                                                  \
-    uint32_t wt2;                                                  \
-                                                                   \
-    wt2 = float32_ ## name(fst0, fst1, &env->active_fpu.fp_status);\
-    update_fcr31(env, GETPC());                                    \
-    return wt2;                                                    \
-}                                                                  \
-                                                                   \
-uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,           \
-                                      uint64_t fdt0,               \
-                                      uint64_t fdt1)               \
-{                                                                  \
-    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                             \
-    uint32_t fsth0 = fdt0 >> 32;                                   \
-    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                             \
-    uint32_t fsth1 = fdt1 >> 32;                                   \
-    uint32_t wt2;                                                  \
-    uint32_t wth2;                                                 \
-                                                                   \
-    wt2 = float32_ ## name(fst0, fst1, &env->active_fpu.fp_status);     \
-    wth2 = float32_ ## name(fsth0, fsth1, &env->active_fpu.fp_status);  \
-    update_fcr31(env, GETPC());                                    \
-    return ((uint64_t)wth2 << 32) | wt2;                           \
-}
-
-FLOAT_BINOP(add)
-FLOAT_BINOP(sub)
-FLOAT_BINOP(mul)
-FLOAT_BINOP(div)
-#undef FLOAT_BINOP
+
+uint64_t helper_float_add_d(CPUMIPSState *env,
+                            uint64_t fdt0, uint64_t fdt1)
+{
+    uint64_t dt2;
+
+    dt2 = float64_add(fdt0, fdt1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_add_s(CPUMIPSState *env,
+                            uint32_t fst0, uint32_t fst1)
+{
+    uint32_t wt2;
+
+    wt2 = float32_add(fst0, fst1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_add_ps(CPUMIPSState *env,
+                             uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t wtl2;
+    uint32_t wth2;
+
+    wtl2 = float32_add(fstl0, fstl1, &env->active_fpu.fp_status);
+    wth2 = float32_add(fsth0, fsth1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)wth2 << 32) | wtl2;
+}
+
+uint64_t helper_float_sub_d(CPUMIPSState *env,
+                            uint64_t fdt0, uint64_t fdt1)
+{
+    uint64_t dt2;
+
+    dt2 = float64_sub(fdt0, fdt1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_sub_s(CPUMIPSState *env,
+                            uint32_t fst0, uint32_t fst1)
+{
+    uint32_t wt2;
+
+    wt2 = float32_sub(fst0, fst1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_sub_ps(CPUMIPSState *env,
+                             uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t wtl2;
+    uint32_t wth2;
+
+    wtl2 = float32_sub(fstl0, fstl1, &env->active_fpu.fp_status);
+    wth2 = float32_sub(fsth0, fsth1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)wth2 << 32) | wtl2;
+}
+
+uint64_t helper_float_mul_d(CPUMIPSState *env,
+                            uint64_t fdt0, uint64_t fdt1)
+{
+    uint64_t dt2;
+
+    dt2 = float64_mul(fdt0, fdt1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_mul_s(CPUMIPSState *env,
+                            uint32_t fst0, uint32_t fst1)
+{
+    uint32_t wt2;
+
+    wt2 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_mul_ps(CPUMIPSState *env,
+                             uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t wtl2;
+    uint32_t wth2;
+
+    wtl2 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
+    wth2 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)wth2 << 32) | wtl2;
+}
+
+uint64_t helper_float_div_d(CPUMIPSState *env,
+                            uint64_t fdt0, uint64_t fdt1)
+{
+    uint64_t dt2;
+
+    dt2 = float64_div(fdt0, fdt1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_div_s(CPUMIPSState *env,
+                            uint32_t fst0, uint32_t fst1)
+{
+    uint32_t wt2;
+
+    wt2 = float32_div(fst0, fst1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_div_ps(CPUMIPSState *env,
+                             uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t wtl2;
+    uint32_t wth2;
+
+    wtl2 = float32_div(fstl0, fstl1, &env->active_fpu.fp_status);
+    wth2 = float32_div(fsth0, fsth1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)wth2 << 32) | wtl2;
+}
+
 
 /* MIPS specific binary operations */
 uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
@@ -1234,19 +1370,19 @@ uint32_t helper_float_recip2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
 
 uint64_t helper_float_recip2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
-    uint32_t fst0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fst2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
     uint32_t fsth2 = fdt2 >> 32;
 
-    fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
+    fstl2 = float32_mul(fstl0, fstl2, &env->active_fpu.fp_status);
     fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
-    fst2 = float32_chs(float32_sub(fst2, float32_one,
+    fstl2 = float32_chs(float32_sub(fstl2, float32_one,
                                        &env->active_fpu.fp_status));
     fsth2 = float32_chs(float32_sub(fsth2, float32_one,
                                        &env->active_fpu.fp_status));
     update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fst2;
+    return ((uint64_t)fsth2 << 32) | fstl2;
 }
 
 uint64_t helper_float_rsqrt2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
@@ -1271,51 +1407,51 @@ uint32_t helper_float_rsqrt2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
 
 uint64_t helper_float_rsqrt2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
-    uint32_t fst0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fst2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
     uint32_t fsth2 = fdt2 >> 32;
 
-    fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
+    fstl2 = float32_mul(fstl0, fstl2, &env->active_fpu.fp_status);
     fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
-    fst2 = float32_sub(fst2, float32_one, &env->active_fpu.fp_status);
+    fstl2 = float32_sub(fstl2, float32_one, &env->active_fpu.fp_status);
     fsth2 = float32_sub(fsth2, float32_one, &env->active_fpu.fp_status);
-    fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32,
+    fstl2 = float32_chs(float32_div(fstl2, FLOAT_TWO32,
                                        &env->active_fpu.fp_status));
     fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32,
                                        &env->active_fpu.fp_status));
     update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fst2;
+    return ((uint64_t)fsth2 << 32) | fstl2;
 }
 
 uint64_t helper_float_addr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
 {
-    uint32_t fst0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fst1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
     uint32_t fsth1 = fdt1 >> 32;
-    uint32_t fst2;
+    uint32_t fstl2;
     uint32_t fsth2;
 
-    fst2 = float32_add(fst0, fsth0, &env->active_fpu.fp_status);
-    fsth2 = float32_add(fst1, fsth1, &env->active_fpu.fp_status);
+    fstl2 = float32_add(fstl0, fsth0, &env->active_fpu.fp_status);
+    fsth2 = float32_add(fstl1, fsth1, &env->active_fpu.fp_status);
     update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fst2;
+    return ((uint64_t)fsth2 << 32) | fstl2;
 }
 
 uint64_t helper_float_mulr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
 {
-    uint32_t fst0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fst1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
     uint32_t fsth1 = fdt1 >> 32;
-    uint32_t fst2;
+    uint32_t fstl2;
     uint32_t fsth2;
 
-    fst2 = float32_mul(fst0, fsth0, &env->active_fpu.fp_status);
-    fsth2 = float32_mul(fst1, fsth1, &env->active_fpu.fp_status);
+    fstl2 = float32_mul(fstl0, fsth0, &env->active_fpu.fp_status);
+    fsth2 = float32_mul(fstl1, fsth1, &env->active_fpu.fp_status);
     update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fst2;
+    return ((uint64_t)fsth2 << 32) | fstl2;
 }
 
 #define FLOAT_MINMAX(name, bits, minmaxfunc)                            \
@@ -1343,60 +1479,171 @@ FLOAT_MINMAX(mina_d, 64, minnummag)
 #undef FLOAT_MINMAX
 
 /* ternary operations */
-#define UNFUSED_FMA(prefix, a, b, c, flags)                          \
-{                                                                    \
-    a = prefix##_mul(a, b, &env->active_fpu.fp_status);              \
-    if ((flags) & float_muladd_negate_c) {                           \
-        a = prefix##_sub(a, c, &env->active_fpu.fp_status);          \
-    } else {                                                         \
-        a = prefix##_add(a, c, &env->active_fpu.fp_status);          \
-    }                                                                \
-    if ((flags) & float_muladd_negate_result) {                      \
-        a = prefix##_chs(a);                                         \
-    }                                                                \
-}
-
-/* FMA based operations */
-#define FLOAT_FMA(name, type)                                        \
-uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,              \
-                                     uint64_t fdt0, uint64_t fdt1,   \
-                                     uint64_t fdt2)                  \
-{                                                                    \
-    UNFUSED_FMA(float64, fdt0, fdt1, fdt2, type);                    \
-    update_fcr31(env, GETPC());                                      \
-    return fdt0;                                                     \
-}                                                                    \
-                                                                     \
-uint32_t helper_float_ ## name ## _s(CPUMIPSState *env,              \
-                                     uint32_t fst0, uint32_t fst1,   \
-                                     uint32_t fst2)                  \
-{                                                                    \
-    UNFUSED_FMA(float32, fst0, fst1, fst2, type);                    \
-    update_fcr31(env, GETPC());                                      \
-    return fst0;                                                     \
-}                                                                    \
-                                                                     \
-uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,             \
-                                      uint64_t fdt0, uint64_t fdt1,  \
-                                      uint64_t fdt2)                 \
-{                                                                    \
-    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                               \
-    uint32_t fsth0 = fdt0 >> 32;                                     \
-    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                               \
-    uint32_t fsth1 = fdt1 >> 32;                                     \
-    uint32_t fst2 = fdt2 & 0XFFFFFFFF;                               \
-    uint32_t fsth2 = fdt2 >> 32;                                     \
-                                                                     \
-    UNFUSED_FMA(float32, fst0, fst1, fst2, type);                    \
-    UNFUSED_FMA(float32, fsth0, fsth1, fsth2, type);                 \
-    update_fcr31(env, GETPC());                                      \
-    return ((uint64_t)fsth0 << 32) | fst0;                           \
-}
-FLOAT_FMA(madd, 0)
-FLOAT_FMA(msub, float_muladd_negate_c)
-FLOAT_FMA(nmadd, float_muladd_negate_result)
-FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c)
-#undef FLOAT_FMA
+
+uint64_t helper_float_madd_d(CPUMIPSState *env, uint64_t fst0,
+                             uint64_t fst1, uint64_t fst2)
+{
+    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float64_add(fst0, fst2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint32_t helper_float_madd_s(CPUMIPSState *env, uint32_t fst0,
+                             uint32_t fst1, uint32_t fst2)
+{
+    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float32_add(fst0, fst2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint64_t helper_float_madd_ps(CPUMIPSState *env, uint64_t fdt0,
+                              uint64_t fdt1, uint64_t fdt2)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
+    fstl0 = float32_add(fstl0, fstl2, &env->active_fpu.fp_status);
+    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
+    fsth0 = float32_add(fsth0, fsth2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth0 << 32) | fstl0;
+}
+
+uint64_t helper_float_msub_d(CPUMIPSState *env, uint64_t fst0,
+                             uint64_t fst1, uint64_t fst2)
+{
+    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float64_sub(fst0, fst2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint32_t helper_float_msub_s(CPUMIPSState *env, uint32_t fst0,
+                             uint32_t fst1, uint32_t fst2)
+{
+    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float32_sub(fst0, fst2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint64_t helper_float_msub_ps(CPUMIPSState *env, uint64_t fdt0,
+                              uint64_t fdt1, uint64_t fdt2)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
+    fstl0 = float32_sub(fstl0, fstl2, &env->active_fpu.fp_status);
+    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
+    fsth0 = float32_sub(fsth0, fsth2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth0 << 32) | fstl0;
+}
+
+uint64_t helper_float_nmadd_d(CPUMIPSState *env, uint64_t fst0,
+                             uint64_t fst1, uint64_t fst2)
+{
+    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float64_add(fst0, fst2, &env->active_fpu.fp_status);
+    fst0 = float64_chs(fst0);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint32_t helper_float_nmadd_s(CPUMIPSState *env, uint32_t fst0,
+                             uint32_t fst1, uint32_t fst2)
+{
+    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float32_add(fst0, fst2, &env->active_fpu.fp_status);
+    fst0 = float32_chs(fst0);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint64_t helper_float_nmadd_ps(CPUMIPSState *env, uint64_t fdt0,
+                              uint64_t fdt1, uint64_t fdt2)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
+    fstl0 = float32_add(fstl0, fstl2, &env->active_fpu.fp_status);
+    fstl0 = float32_chs(fstl0);
+    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
+    fsth0 = float32_add(fsth0, fsth2, &env->active_fpu.fp_status);
+    fsth0 = float32_chs(fsth0);
+
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth0 << 32) | fstl0;
+}
+
+uint64_t helper_float_nmsub_d(CPUMIPSState *env, uint64_t fst0,
+                             uint64_t fst1, uint64_t fst2)
+{
+    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float64_sub(fst0, fst2, &env->active_fpu.fp_status);
+    fst0 = float64_chs(fst0);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint32_t helper_float_nmsub_s(CPUMIPSState *env, uint32_t fst0,
+                             uint32_t fst1, uint32_t fst2)
+{
+    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float32_sub(fst0, fst2, &env->active_fpu.fp_status);
+    fst0 = float32_chs(fst0);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint64_t helper_float_nmsub_ps(CPUMIPSState *env, uint64_t fdt0,
+                              uint64_t fdt1, uint64_t fdt2)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
+    fstl0 = float32_sub(fstl0, fstl2, &env->active_fpu.fp_status);
+    fstl0 = float32_chs(fstl0);
+    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
+    fsth0 = float32_sub(fsth0, fsth2, &env->active_fpu.fp_status);
+    fsth0 = float32_chs(fsth0);
+
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth0 << 32) | fstl0;
+}
+
 
 #define FLOAT_FMADDSUB(name, bits, muladd_arg)                          \
 uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env,             \
diff --git a/qemu/target/mips/helper.c b/qemu/target/mips/helper.c
index 3c2ba8cec0..f407873180 100644
--- a/qemu/target/mips/helper.c
+++ b/qemu/target/mips/helper.c
@@ -68,6 +68,7 @@ int fixed_mmu_map_address(CPUMIPSState *env, hwaddr *physical, int *prot,
 int r4k_map_address(CPUMIPSState *env, hwaddr *physical, int *prot,
                     target_ulong address, int rw, int access_type)
 {
+    struct uc_struct *uc = env->uc;
     uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
     uint32_t MMID = env->CP0_MemoryMapID;
     bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
@@ -461,6 +462,7 @@ void cpu_mips_store_cause(CPUMIPSState *env, target_ulong val)
 static void raise_mmu_exception(CPUMIPSState *env, target_ulong address,
                                 int rw, int tlb_error)
 {
+    struct uc_struct *uc = env->uc;
     CPUState *cs = env_cpu(env);
     int exception = 0, error_code = 0;
 
@@ -903,6 +905,7 @@ bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
 {
     MIPSCPU *cpu = MIPS_CPU(cs);
     CPUMIPSState *env = &cpu->env;
+    struct uc_struct *uc = env->uc;
     hwaddr physical;
     int prot;
     int mips_access_type;
@@ -1424,6 +1427,7 @@ bool mips_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 
 void r4k_invalidate_tlb(CPUMIPSState *env, int idx, int use_extra)
 {
+    struct uc_struct *uc = env->uc;
     CPUState *cs = env_cpu(env);
     r4k_tlb_t *tlb;
     target_ulong addr;
diff --git a/qemu/target/mips/helper.h b/qemu/target/mips/helper.h
index 221e78257b..012f867e59 100644
--- a/qemu/target/mips/helper.h
+++ b/qemu/target/mips/helper.h
@@ -945,6 +945,21 @@ DEF_HELPER_4(msa_mod_s_h, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_mod_s_w, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_mod_s_d, void, env, i32, i32, i32)
 
+DEF_HELPER_4(msa_maddv_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_maddv_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_maddv_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_maddv_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(msa_msubv_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_msubv_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_msubv_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_msubv_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(msa_mulv_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_mulv_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_mulv_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_mulv_d, void, env, i32, i32, i32)
+
 DEF_HELPER_4(msa_asub_s_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_asub_s_h, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_asub_s_w, void, env, i32, i32, i32)
@@ -963,6 +978,31 @@ DEF_HELPER_4(msa_hsub_u_h, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_hsub_u_w, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_hsub_u_d, void, env, i32, i32, i32)
 
+DEF_HELPER_4(msa_subs_s_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subs_s_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subs_s_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subs_s_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(msa_subs_u_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subs_u_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subs_u_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subs_u_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(msa_subsus_u_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subsus_u_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subsus_u_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subsus_u_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(msa_subsuu_s_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subsuu_s_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subsuu_s_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subsuu_s_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(msa_subv_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subv_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subv_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_subv_d, void, env, i32, i32, i32)
+
 DEF_HELPER_4(msa_ilvev_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ilvev_h, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ilvev_w, void, env, i32, i32, i32)
@@ -1058,20 +1098,25 @@ DEF_HELPER_5(msa_srlri_df, void, env, i32, i32, i32, i32)
 
 DEF_HELPER_5(msa_binsl_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_binsr_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_subv_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_subs_s_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_subs_u_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_subsus_u_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_subsuu_s_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_mulv_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_maddv_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_msubv_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_dotp_s_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_dotp_u_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_dpadd_s_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_dpadd_u_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_dpsub_s_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_dpsub_u_df, void, env, i32, i32, i32, i32)
+
+DEF_HELPER_4(msa_dotp_s_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dotp_s_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dotp_s_d, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dotp_u_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dotp_u_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dotp_u_d, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpadd_s_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpadd_s_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpadd_s_d, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpadd_u_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpadd_u_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpadd_u_d, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpsub_s_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpsub_s_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpsub_s_d, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpsub_u_h, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpsub_u_w, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_dpsub_u_d, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_sld_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_splat_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_vshf_df, void, env, i32, i32, i32, i32)
diff --git a/qemu/target/mips/internal.h b/qemu/target/mips/internal.h
index 6978801d9e..2d6032ceff 100644
--- a/qemu/target/mips/internal.h
+++ b/qemu/target/mips/internal.h
@@ -39,7 +39,9 @@ struct mips_def_t {
     int32_t CP0_Config5;
     int32_t CP0_Config5_rw_bitmask;
     int32_t CP0_Config6;
+    int32_t CP0_Config6_rw_bitmask;
     int32_t CP0_Config7;
+    int32_t CP0_Config7_rw_bitmask;
     target_ulong CP0_LLAddr_rw_bitmask;
     int CP0_LLAddr_shift;
     int32_t SYNCI_Step;
@@ -217,7 +219,6 @@ uint32_t float_class_s(uint32_t arg, float_status *fst);
 uint64_t float_class_d(uint64_t arg, float_status *fst);
 
 extern unsigned int ieee_rm[];
-int ieee_ex_to_mips(int xcpt);
 void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask);
 
 static inline void restore_rounding_mode(CPUMIPSState *env)
diff --git a/qemu/target/mips/mips-defs.h b/qemu/target/mips/mips-defs.h
index a831bb4384..ed6a7a9e54 100644
--- a/qemu/target/mips/mips-defs.h
+++ b/qemu/target/mips/mips-defs.h
@@ -15,7 +15,7 @@
  * ------------------------------------------------
  */
 /*
- *   bits 0-31: MIPS base instruction sets
+ *   bits 0-23: MIPS base instruction sets
  */
 #define ISA_MIPS1         0x0000000000000001ULL
 #define ISA_MIPS2         0x0000000000000002ULL
@@ -34,30 +34,37 @@
 #define ISA_MIPS64R6      0x0000000000004000ULL
 #define ISA_NANOMIPS32    0x0000000000008000ULL
 /*
- *   bits 32-47: MIPS ASEs
+ *   bits 24-39: MIPS ASEs
  */
-#define ASE_MIPS16        0x0000000100000000ULL
-#define ASE_MIPS3D        0x0000000200000000ULL
-#define ASE_MDMX          0x0000000400000000ULL
-#define ASE_DSP           0x0000000800000000ULL
-#define ASE_DSP_R2        0x0000001000000000ULL
-#define ASE_DSP_R3        0x0000002000000000ULL
-#define ASE_MT            0x0000004000000000ULL
-#define ASE_SMARTMIPS     0x0000008000000000ULL
-#define ASE_MICROMIPS     0x0000010000000000ULL
-#define ASE_MSA           0x0000020000000000ULL
+#define ASE_MIPS16        0x0000000001000000ULL
+#define ASE_MIPS3D        0x0000000002000000ULL
+#define ASE_MDMX          0x0000000004000000ULL
+#define ASE_DSP           0x0000000008000000ULL
+#define ASE_DSP_R2        0x0000000010000000ULL
+#define ASE_DSP_R3        0x0000000020000000ULL
+#define ASE_MT            0x0000000040000000ULL
+#define ASE_SMARTMIPS     0x0000000080000000ULL
+#define ASE_MICROMIPS     0x0000000100000000ULL
+#define ASE_MSA           0x0000000200000000ULL
 /*
- *   bits 48-55: vendor-specific base instruction sets
+ *   bits 40-51: vendor-specific base instruction sets
  */
-#define INSN_LOONGSON2E   0x0001000000000000ULL
-#define INSN_LOONGSON2F   0x0002000000000000ULL
-#define INSN_VR54XX       0x0004000000000000ULL
-#define INSN_R5900        0x0008000000000000ULL
+#define INSN_VR54XX       0x0000010000000000ULL
+#define INSN_R5900        0x0000020000000000ULL
+#define INSN_LOONGSON2E   0x0000040000000000ULL
+#define INSN_LOONGSON2F   0x0000080000000000ULL
+#define INSN_LOONGSON3A   0x0000100000000000ULL
 /*
- *   bits 56-63: vendor-specific ASEs
+ *   bits 52-63: vendor-specific ASEs
  */
-#define ASE_MMI           0x0100000000000000ULL
-#define ASE_MXU           0x0200000000000000ULL
+/* MultiMedia Instructions defined by R5900 */
+#define ASE_MMI           0x0010000000000000ULL
+/* MIPS eXtension/enhanced Unit defined by Ingenic */
+#define ASE_MXU           0x0020000000000000ULL
+/* Loongson MultiMedia Instructions */
+#define ASE_LMMI          0x0040000000000000ULL
+/* Loongson EXTensions */
+#define ASE_LEXT          0x0080000000000000ULL
 
 /* MIPS CPU defines. */
 #define CPU_MIPS1       (ISA_MIPS1)
@@ -67,7 +74,7 @@
 #define CPU_VR54XX      (CPU_MIPS4 | INSN_VR54XX)
 #define CPU_R5900       (CPU_MIPS3 | INSN_R5900)
 #define CPU_LOONGSON2E  (CPU_MIPS3 | INSN_LOONGSON2E)
-#define CPU_LOONGSON2F  (CPU_MIPS3 | INSN_LOONGSON2F)
+#define CPU_LOONGSON2F  (CPU_MIPS3 | INSN_LOONGSON2F | ASE_LMMI)
 
 #define CPU_MIPS5       (CPU_MIPS4 | ISA_MIPS5)
 
@@ -94,6 +101,8 @@
 /* Wave Computing: "nanoMIPS" */
 #define CPU_NANOMIPS32  (CPU_MIPS32R6 | ISA_NANOMIPS32)
 
+#define CPU_LOONGSON3A  (CPU_MIPS64R2 | INSN_LOONGSON3A | ASE_LMMI | ASE_LEXT)
+
 /*
  * Strictly follow the architecture standard:
  * - Disallow "special" instruction handling for PMON/SPIM.
diff --git a/qemu/target/mips/msa_helper.c b/qemu/target/mips/msa_helper.c
index c3501927ce..e83c899a93 100644
--- a/qemu/target/mips/msa_helper.c
+++ b/qemu/target/mips/msa_helper.c
@@ -2232,7 +2232,339 @@ void helper_msa_div_u_d(CPUMIPSState *env,
  * +---------------+----------------------------------------------------------+
  */
 
-/* TODO: insert Int Dot Product group helpers here */
+#define SIGNED_EXTRACT(e, o, a, df)     \
+    do {                                \
+        e = SIGNED_EVEN(a, df);         \
+        o = SIGNED_ODD(a, df);          \
+    } while (0)
+
+#define UNSIGNED_EXTRACT(e, o, a, df)   \
+    do {                                \
+        e = UNSIGNED_EVEN(a, df);       \
+        o = UNSIGNED_ODD(a, df);        \
+    } while (0)
+
+
+static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
+{
+    int64_t even_arg1;
+    int64_t even_arg2;
+    int64_t odd_arg1;
+    int64_t odd_arg2;
+    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dotp_s_h(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_dotp_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_dotp_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_dotp_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_dotp_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_dotp_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_dotp_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_dotp_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_dotp_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_dotp_s_w(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_dotp_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_dotp_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_dotp_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_dotp_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_dotp_s_d(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_dotp_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_dotp_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
+{
+    int64_t even_arg1;
+    int64_t even_arg2;
+    int64_t odd_arg1;
+    int64_t odd_arg2;
+    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dotp_u_h(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_dotp_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_dotp_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_dotp_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_dotp_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_dotp_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_dotp_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_dotp_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_dotp_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_dotp_u_w(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_dotp_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_dotp_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_dotp_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_dotp_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_dotp_u_d(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_dotp_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_dotp_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
+                                     int64_t arg2)
+{
+    int64_t even_arg1;
+    int64_t even_arg2;
+    int64_t odd_arg1;
+    int64_t odd_arg2;
+    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dpadd_s_h(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_dpadd_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_dpadd_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_dpadd_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_dpadd_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_dpadd_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_dpadd_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_dpadd_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_dpadd_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_dpadd_s_w(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_dpadd_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_dpadd_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_dpadd_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_dpadd_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_dpadd_s_d(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
+                                     int64_t arg2)
+{
+    int64_t even_arg1;
+    int64_t even_arg2;
+    int64_t odd_arg1;
+    int64_t odd_arg2;
+    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dpadd_u_h(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_dpadd_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_dpadd_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_dpadd_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_dpadd_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_dpadd_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_dpadd_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_dpadd_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_dpadd_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_dpadd_u_w(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_dpadd_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_dpadd_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_dpadd_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_dpadd_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_dpadd_u_d(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
+                                     int64_t arg2)
+{
+    int64_t even_arg1;
+    int64_t even_arg2;
+    int64_t odd_arg1;
+    int64_t odd_arg2;
+    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
+}
+
+void helper_msa_dpsub_s_h(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_dpsub_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_dpsub_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_dpsub_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_dpsub_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_dpsub_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_dpsub_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_dpsub_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_dpsub_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_dpsub_s_w(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_dpsub_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_dpsub_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_dpsub_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_dpsub_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_dpsub_s_d(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
+                                     int64_t arg2)
+{
+    int64_t even_arg1;
+    int64_t even_arg2;
+    int64_t odd_arg1;
+    int64_t odd_arg2;
+    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
+}
+
+void helper_msa_dpsub_u_h(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_dpsub_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_dpsub_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_dpsub_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_dpsub_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_dpsub_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_dpsub_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_dpsub_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_dpsub_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_dpsub_u_w(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_dpsub_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_dpsub_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_dpsub_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_dpsub_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_dpsub_u_d(CPUMIPSState *env,
+                          uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
+}
 
 
 /*
@@ -2891,36 +3223,250 @@ void helper_msa_mod_u_d(CPUMIPSState *env,
  * +---------------+----------------------------------------------------------+
  */
 
-/* TODO: insert Int Multiply group helpers here */
+static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
+                                   int64_t arg2)
+{
+    return dest + arg1 * arg2;
+}
 
+void helper_msa_maddv_b(CPUMIPSState *env,
+                        uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-/*
- * Int Subtract
- * ------------
- *
- * +---------------+----------------------------------------------------------+
- * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
- * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
- * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
- * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
- * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
- * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
- * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
- * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
- * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
- * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
- * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
- * | HSUB_U.H      | Vector Unigned Horizontal Subtract (halfword)            |
- * | HSUB_U.W      | Vector Unigned Horizontal Subtract (word)                |
- * | HSUB_U.D      | Vector Unigned Horizontal Subtract (doubleword)          |
- * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
- * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
- * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
- * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
- * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
- * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
- * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
- * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
+    pwd->b[0]  = msa_maddv_df(DF_BYTE, pwt->b[0],  pws->b[0],  pwt->b[0]);
+    pwd->b[1]  = msa_maddv_df(DF_BYTE, pwt->b[1],  pws->b[1],  pwt->b[1]);
+    pwd->b[2]  = msa_maddv_df(DF_BYTE, pwt->b[2],  pws->b[2],  pwt->b[2]);
+    pwd->b[3]  = msa_maddv_df(DF_BYTE, pwt->b[3],  pws->b[3],  pwt->b[3]);
+    pwd->b[4]  = msa_maddv_df(DF_BYTE, pwt->b[4],  pws->b[4],  pwt->b[4]);
+    pwd->b[5]  = msa_maddv_df(DF_BYTE, pwt->b[5],  pws->b[5],  pwt->b[5]);
+    pwd->b[6]  = msa_maddv_df(DF_BYTE, pwt->b[6],  pws->b[6],  pwt->b[6]);
+    pwd->b[7]  = msa_maddv_df(DF_BYTE, pwt->b[7],  pws->b[7],  pwt->b[7]);
+    pwd->b[8]  = msa_maddv_df(DF_BYTE, pwt->b[8],  pws->b[8],  pwt->b[8]);
+    pwd->b[9]  = msa_maddv_df(DF_BYTE, pwt->b[9],  pws->b[9],  pwt->b[9]);
+    pwd->b[10] = msa_maddv_df(DF_BYTE, pwt->b[10], pws->b[10], pwt->b[10]);
+    pwd->b[11] = msa_maddv_df(DF_BYTE, pwt->b[11], pws->b[11], pwt->b[11]);
+    pwd->b[12] = msa_maddv_df(DF_BYTE, pwt->b[12], pws->b[12], pwt->b[12]);
+    pwd->b[13] = msa_maddv_df(DF_BYTE, pwt->b[13], pws->b[13], pwt->b[13]);
+    pwd->b[14] = msa_maddv_df(DF_BYTE, pwt->b[14], pws->b[14], pwt->b[14]);
+    pwd->b[15] = msa_maddv_df(DF_BYTE, pwt->b[15], pws->b[15], pwt->b[15]);
+}
+
+void helper_msa_maddv_h(CPUMIPSState *env,
+                        uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_maddv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_maddv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_maddv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_maddv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_maddv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_maddv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_maddv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_maddv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_maddv_w(CPUMIPSState *env,
+                        uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_maddv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_maddv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_maddv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_maddv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_maddv_d(CPUMIPSState *env,
+                        uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_maddv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_maddv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
+}
+
+static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
+                                   int64_t arg2)
+{
+    return dest - arg1 * arg2;
+}
+
+void helper_msa_msubv_b(CPUMIPSState *env,
+                        uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->b[0]  = msa_msubv_df(DF_BYTE, pwt->b[0],  pws->b[0],  pwt->b[0]);
+    pwd->b[1]  = msa_msubv_df(DF_BYTE, pwt->b[1],  pws->b[1],  pwt->b[1]);
+    pwd->b[2]  = msa_msubv_df(DF_BYTE, pwt->b[2],  pws->b[2],  pwt->b[2]);
+    pwd->b[3]  = msa_msubv_df(DF_BYTE, pwt->b[3],  pws->b[3],  pwt->b[3]);
+    pwd->b[4]  = msa_msubv_df(DF_BYTE, pwt->b[4],  pws->b[4],  pwt->b[4]);
+    pwd->b[5]  = msa_msubv_df(DF_BYTE, pwt->b[5],  pws->b[5],  pwt->b[5]);
+    pwd->b[6]  = msa_msubv_df(DF_BYTE, pwt->b[6],  pws->b[6],  pwt->b[6]);
+    pwd->b[7]  = msa_msubv_df(DF_BYTE, pwt->b[7],  pws->b[7],  pwt->b[7]);
+    pwd->b[8]  = msa_msubv_df(DF_BYTE, pwt->b[8],  pws->b[8],  pwt->b[8]);
+    pwd->b[9]  = msa_msubv_df(DF_BYTE, pwt->b[9],  pws->b[9],  pwt->b[9]);
+    pwd->b[10] = msa_msubv_df(DF_BYTE, pwt->b[10], pws->b[10], pwt->b[10]);
+    pwd->b[11] = msa_msubv_df(DF_BYTE, pwt->b[11], pws->b[11], pwt->b[11]);
+    pwd->b[12] = msa_msubv_df(DF_BYTE, pwt->b[12], pws->b[12], pwt->b[12]);
+    pwd->b[13] = msa_msubv_df(DF_BYTE, pwt->b[13], pws->b[13], pwt->b[13]);
+    pwd->b[14] = msa_msubv_df(DF_BYTE, pwt->b[14], pws->b[14], pwt->b[14]);
+    pwd->b[15] = msa_msubv_df(DF_BYTE, pwt->b[15], pws->b[15], pwt->b[15]);
+}
+
+void helper_msa_msubv_h(CPUMIPSState *env,
+                        uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_msubv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_msubv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_msubv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_msubv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_msubv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_msubv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_msubv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_msubv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_msubv_w(CPUMIPSState *env,
+                        uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_msubv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_msubv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_msubv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_msubv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_msubv_d(CPUMIPSState *env,
+                        uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_msubv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_msubv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
+{
+    return arg1 * arg2;
+}
+
+void helper_msa_mulv_b(CPUMIPSState *env,
+                       uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->b[0]  = msa_mulv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
+    pwd->b[1]  = msa_mulv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
+    pwd->b[2]  = msa_mulv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
+    pwd->b[3]  = msa_mulv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
+    pwd->b[4]  = msa_mulv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
+    pwd->b[5]  = msa_mulv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
+    pwd->b[6]  = msa_mulv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
+    pwd->b[7]  = msa_mulv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
+    pwd->b[8]  = msa_mulv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
+    pwd->b[9]  = msa_mulv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
+    pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]);
+    pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]);
+    pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]);
+    pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]);
+    pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]);
+    pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]);
+}
+
+void helper_msa_mulv_h(CPUMIPSState *env,
+                       uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_mulv_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_mulv_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_mulv_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_mulv_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_mulv_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_mulv_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_mulv_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_mulv_df(DF_HALF, pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_mulv_w(CPUMIPSState *env,
+                       uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_mulv_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_mulv_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_mulv_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_mulv_df(DF_WORD, pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_mulv_d(CPUMIPSState *env,
+                       uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_mulv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_mulv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
+}
+
+
+/*
+ * Int Subtract
+ * ------------
+ *
+ * +---------------+----------------------------------------------------------+
+ * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
+ * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
+ * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
+ * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
+ * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
+ * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
+ * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
+ * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
+ * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
+ * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
+ * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
+ * | HSUB_U.H      | Vector Unigned Horizontal Subtract (halfword)            |
+ * | HSUB_U.W      | Vector Unigned Horizontal Subtract (word)                |
+ * | HSUB_U.D      | Vector Unigned Horizontal Subtract (doubleword)          |
+ * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
+ * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
+ * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
+ * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
+ * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
+ * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
+ * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
+ * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
  * | SUBSUS_U.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
  * | SUBSUS_U.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
  * | SUBSUS_U.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
@@ -3045,142 +3591,531 @@ void helper_msa_asub_u_b(CPUMIPSState *env,
     pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
 }
 
-void helper_msa_asub_u_h(CPUMIPSState *env,
-                         uint32_t wd, uint32_t ws, uint32_t wt)
+void helper_msa_asub_u_h(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_asub_u_w(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_asub_u_d(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
+{
+    return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
+}
+
+void helper_msa_hsub_s_h(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_hsub_s_w(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_hsub_s_d(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
+{
+    return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
+}
+
+void helper_msa_hsub_u_h(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_hsub_u_w(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_hsub_u_d(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
+{
+    int64_t max_int = DF_MAX_INT(df);
+    int64_t min_int = DF_MIN_INT(df);
+    if (arg2 > 0) {
+        return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
+    } else {
+        return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
+    }
+}
+
+void helper_msa_subs_s_b(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->b[0]  = msa_subs_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
+    pwd->b[1]  = msa_subs_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
+    pwd->b[2]  = msa_subs_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
+    pwd->b[3]  = msa_subs_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
+    pwd->b[4]  = msa_subs_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
+    pwd->b[5]  = msa_subs_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
+    pwd->b[6]  = msa_subs_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
+    pwd->b[7]  = msa_subs_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
+    pwd->b[8]  = msa_subs_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
+    pwd->b[9]  = msa_subs_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
+    pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
+    pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
+    pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
+    pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
+    pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
+    pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
+}
+
+void helper_msa_subs_s_h(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_subs_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_subs_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_subs_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_subs_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_subs_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_subs_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_subs_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_subs_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_subs_s_w(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_subs_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_subs_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_subs_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_subs_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_subs_s_d(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_subs_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_subs_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
+}
+
+void helper_msa_subs_u_b(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->b[0]  = msa_subs_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
+    pwd->b[1]  = msa_subs_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
+    pwd->b[2]  = msa_subs_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
+    pwd->b[3]  = msa_subs_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
+    pwd->b[4]  = msa_subs_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
+    pwd->b[5]  = msa_subs_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
+    pwd->b[6]  = msa_subs_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
+    pwd->b[7]  = msa_subs_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
+    pwd->b[8]  = msa_subs_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
+    pwd->b[9]  = msa_subs_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
+    pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
+    pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
+    pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
+    pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
+    pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
+    pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
+}
+
+void helper_msa_subs_u_h(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_subs_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_subs_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_subs_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_subs_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_subs_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_subs_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_subs_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_subs_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_subs_u_w(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_subs_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_subs_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_subs_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_subs_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_subs_u_d(CPUMIPSState *env,
+                         uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_subs_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_subs_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t max_uint = DF_MAX_UINT(df);
+    if (arg2 >= 0) {
+        uint64_t u_arg2 = (uint64_t)arg2;
+        return (u_arg1 > u_arg2) ?
+            (int64_t)(u_arg1 - u_arg2) :
+            0;
+    } else {
+        uint64_t u_arg2 = (uint64_t)(-arg2);
+        return (u_arg1 < max_uint - u_arg2) ?
+            (int64_t)(u_arg1 + u_arg2) :
+            (int64_t)max_uint;
+    }
+}
+
+void helper_msa_subsus_u_b(CPUMIPSState *env,
+                           uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->b[0]  = msa_subsus_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
+    pwd->b[1]  = msa_subsus_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
+    pwd->b[2]  = msa_subsus_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
+    pwd->b[3]  = msa_subsus_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
+    pwd->b[4]  = msa_subsus_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
+    pwd->b[5]  = msa_subsus_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
+    pwd->b[6]  = msa_subsus_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
+    pwd->b[7]  = msa_subsus_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
+    pwd->b[8]  = msa_subsus_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
+    pwd->b[9]  = msa_subsus_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
+    pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
+    pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
+    pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
+    pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
+    pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
+    pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
+}
+
+void helper_msa_subsus_u_h(CPUMIPSState *env,
+                           uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->h[0]  = msa_subsus_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_subsus_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_subsus_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_subsus_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_subsus_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_subsus_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_subsus_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_subsus_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
+}
+
+void helper_msa_subsus_u_w(CPUMIPSState *env,
+                           uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->w[0]  = msa_subsus_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_subsus_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_subsus_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_subsus_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
+}
+
+void helper_msa_subsus_u_d(CPUMIPSState *env,
+                           uint32_t wd, uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    pwd->d[0]  = msa_subsus_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_subsus_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
+}
+
+
+static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    int64_t max_int = DF_MAX_INT(df);
+    int64_t min_int = DF_MIN_INT(df);
+    if (u_arg1 > u_arg2) {
+        return u_arg1 - u_arg2 < (uint64_t)max_int ?
+            (int64_t)(u_arg1 - u_arg2) :
+            max_int;
+    } else {
+        return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
+            (int64_t)(u_arg1 - u_arg2) :
+            min_int;
+    }
+}
+
+void helper_msa_subsuu_s_b(CPUMIPSState *env,
+                           uint32_t wd, uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
-    pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
-    pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
-    pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
-    pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
-    pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
-    pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
-    pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
+    pwd->b[0]  = msa_subsuu_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
+    pwd->b[1]  = msa_subsuu_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
+    pwd->b[2]  = msa_subsuu_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
+    pwd->b[3]  = msa_subsuu_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
+    pwd->b[4]  = msa_subsuu_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
+    pwd->b[5]  = msa_subsuu_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
+    pwd->b[6]  = msa_subsuu_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
+    pwd->b[7]  = msa_subsuu_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
+    pwd->b[8]  = msa_subsuu_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
+    pwd->b[9]  = msa_subsuu_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
+    pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
+    pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
+    pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
+    pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
+    pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
+    pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
 }
 
-void helper_msa_asub_u_w(CPUMIPSState *env,
-                         uint32_t wd, uint32_t ws, uint32_t wt)
+void helper_msa_subsuu_s_h(CPUMIPSState *env,
+                           uint32_t wd, uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
-    pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
-    pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
-    pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
+    pwd->h[0]  = msa_subsuu_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_subsuu_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_subsuu_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_subsuu_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_subsuu_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_subsuu_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_subsuu_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_subsuu_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
 }
 
-void helper_msa_asub_u_d(CPUMIPSState *env,
-                         uint32_t wd, uint32_t ws, uint32_t wt)
+void helper_msa_subsuu_s_w(CPUMIPSState *env,
+                           uint32_t wd, uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
-    pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
-}
-
-
-/* TODO: insert the rest of Int Subtract group helpers here */
-
-
-static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
-{
-    return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
+    pwd->w[0]  = msa_subsuu_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_subsuu_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_subsuu_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_subsuu_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
 }
 
-void helper_msa_hsub_s_h(CPUMIPSState *env,
-                         uint32_t wd, uint32_t ws, uint32_t wt)
+void helper_msa_subsuu_s_d(CPUMIPSState *env,
+                           uint32_t wd, uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
-    pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
-    pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
-    pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
-    pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
-    pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
-    pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
-    pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
+    pwd->d[0]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
 }
 
-void helper_msa_hsub_s_w(CPUMIPSState *env,
-                         uint32_t wd, uint32_t ws, uint32_t wt)
-{
-    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
-    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
-    pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
-    pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
-    pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
+static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
+{
+    return arg1 - arg2;
 }
 
-void helper_msa_hsub_s_d(CPUMIPSState *env,
-                         uint32_t wd, uint32_t ws, uint32_t wt)
+void helper_msa_subv_b(CPUMIPSState *env,
+                       uint32_t wd, uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
-    pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
-}
-
-
-static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
-{
-    return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
+    pwd->b[0]  = msa_subv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
+    pwd->b[1]  = msa_subv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
+    pwd->b[2]  = msa_subv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
+    pwd->b[3]  = msa_subv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
+    pwd->b[4]  = msa_subv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
+    pwd->b[5]  = msa_subv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
+    pwd->b[6]  = msa_subv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
+    pwd->b[7]  = msa_subv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
+    pwd->b[8]  = msa_subv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
+    pwd->b[9]  = msa_subv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
+    pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]);
+    pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]);
+    pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]);
+    pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]);
+    pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]);
+    pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]);
 }
 
-void helper_msa_hsub_u_h(CPUMIPSState *env,
-                         uint32_t wd, uint32_t ws, uint32_t wt)
+void helper_msa_subv_h(CPUMIPSState *env,
+                       uint32_t wd, uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
-    pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
-    pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
-    pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
-    pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
-    pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
-    pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
-    pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
+    pwd->h[0]  = msa_subv_df(DF_HALF, pws->h[0],  pwt->h[0]);
+    pwd->h[1]  = msa_subv_df(DF_HALF, pws->h[1],  pwt->h[1]);
+    pwd->h[2]  = msa_subv_df(DF_HALF, pws->h[2],  pwt->h[2]);
+    pwd->h[3]  = msa_subv_df(DF_HALF, pws->h[3],  pwt->h[3]);
+    pwd->h[4]  = msa_subv_df(DF_HALF, pws->h[4],  pwt->h[4]);
+    pwd->h[5]  = msa_subv_df(DF_HALF, pws->h[5],  pwt->h[5]);
+    pwd->h[6]  = msa_subv_df(DF_HALF, pws->h[6],  pwt->h[6]);
+    pwd->h[7]  = msa_subv_df(DF_HALF, pws->h[7],  pwt->h[7]);
 }
 
-void helper_msa_hsub_u_w(CPUMIPSState *env,
-                         uint32_t wd, uint32_t ws, uint32_t wt)
+void helper_msa_subv_w(CPUMIPSState *env,
+                       uint32_t wd, uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
-    pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
-    pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
-    pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
+    pwd->w[0]  = msa_subv_df(DF_WORD, pws->w[0],  pwt->w[0]);
+    pwd->w[1]  = msa_subv_df(DF_WORD, pws->w[1],  pwt->w[1]);
+    pwd->w[2]  = msa_subv_df(DF_WORD, pws->w[2],  pwt->w[2]);
+    pwd->w[3]  = msa_subv_df(DF_WORD, pws->w[3],  pwt->w[3]);
 }
 
-void helper_msa_hsub_u_d(CPUMIPSState *env,
-                         uint32_t wd, uint32_t ws, uint32_t wt)
+void helper_msa_subv_d(CPUMIPSState *env,
+                       uint32_t wd, uint32_t ws, uint32_t wt)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
-    pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
+    pwd->d[0]  = msa_subv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
+    pwd->d[1]  = msa_subv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
 }
 
 
@@ -4408,11 +5343,6 @@ void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     msa_move_v(pwd, pwx);
 }
 
-static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
-{
-    return arg1 - arg2;
-}
-
 #define MSA_BINOP_IMM_DF(helper, func)                                  \
 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
                         uint32_t wd, uint32_t ws, int32_t u5)           \
@@ -4594,97 +5524,6 @@ MSA_TEROP_IMMU_DF(binsli, binsl)
 MSA_TEROP_IMMU_DF(binsri, binsr)
 #undef MSA_TEROP_IMMU_DF
 
-static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
-{
-    int64_t max_int = DF_MAX_INT(df);
-    int64_t min_int = DF_MIN_INT(df);
-    if (arg2 > 0) {
-        return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
-    } else {
-        return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
-    }
-}
-
-static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
-{
-    uint64_t u_arg1 = UNSIGNED(arg1, df);
-    uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
-}
-
-static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
-{
-    uint64_t u_arg1 = UNSIGNED(arg1, df);
-    uint64_t max_uint = DF_MAX_UINT(df);
-    if (arg2 >= 0) {
-        uint64_t u_arg2 = (uint64_t)arg2;
-        return (u_arg1 > u_arg2) ?
-            (int64_t)(u_arg1 - u_arg2) :
-            0;
-    } else {
-        uint64_t u_arg2 = (uint64_t)(-arg2);
-        return (u_arg1 < max_uint - u_arg2) ?
-            (int64_t)(u_arg1 + u_arg2) :
-            (int64_t)max_uint;
-    }
-}
-
-static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
-{
-    uint64_t u_arg1 = UNSIGNED(arg1, df);
-    uint64_t u_arg2 = UNSIGNED(arg2, df);
-    int64_t max_int = DF_MAX_INT(df);
-    int64_t min_int = DF_MIN_INT(df);
-    if (u_arg1 > u_arg2) {
-        return u_arg1 - u_arg2 < (uint64_t)max_int ?
-            (int64_t)(u_arg1 - u_arg2) :
-            max_int;
-    } else {
-        return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
-            (int64_t)(u_arg1 - u_arg2) :
-            min_int;
-    }
-}
-
-static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
-{
-    return arg1 * arg2;
-}
-
-#define SIGNED_EXTRACT(e, o, a, df)     \
-    do {                                \
-        e = SIGNED_EVEN(a, df);         \
-        o = SIGNED_ODD(a, df);          \
-    } while (0)
-
-#define UNSIGNED_EXTRACT(e, o, a, df)   \
-    do {                                \
-        e = UNSIGNED_EVEN(a, df);       \
-        o = UNSIGNED_ODD(a, df);        \
-    } while (0)
-
-static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
-{
-    int64_t even_arg1;
-    int64_t even_arg2;
-    int64_t odd_arg1;
-    int64_t odd_arg2;
-    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
-    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
-    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
-}
-
-static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
-{
-    int64_t even_arg1;
-    int64_t even_arg2;
-    int64_t odd_arg1;
-    int64_t odd_arg2;
-    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
-    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
-    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
-}
-
 #define CONCATENATE_AND_SLIDE(s, k)             \
     do {                                        \
         for (i = 0; i < s; i++) {               \
@@ -4802,15 +5641,6 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
     }                                                                   \
 }
 
-MSA_BINOP_DF(subv)
-MSA_BINOP_DF(subs_s)
-MSA_BINOP_DF(subs_u)
-MSA_BINOP_DF(subsus_u)
-MSA_BINOP_DF(subsuu_s)
-MSA_BINOP_DF(mulv)
-MSA_BINOP_DF(dotp_s)
-MSA_BINOP_DF(dotp_u)
-
 MSA_BINOP_DF(mul_q)
 MSA_BINOP_DF(mulr_q)
 #undef MSA_BINOP_DF
@@ -4824,66 +5654,6 @@ void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
 }
 
-static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
-                                   int64_t arg2)
-{
-    return dest + arg1 * arg2;
-}
-
-static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
-                                   int64_t arg2)
-{
-    return dest - arg1 * arg2;
-}
-
-static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
-                                     int64_t arg2)
-{
-    int64_t even_arg1;
-    int64_t even_arg2;
-    int64_t odd_arg1;
-    int64_t odd_arg2;
-    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
-    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
-    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
-}
-
-static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
-                                     int64_t arg2)
-{
-    int64_t even_arg1;
-    int64_t even_arg2;
-    int64_t odd_arg1;
-    int64_t odd_arg2;
-    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
-    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
-    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
-}
-
-static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
-                                     int64_t arg2)
-{
-    int64_t even_arg1;
-    int64_t even_arg2;
-    int64_t odd_arg1;
-    int64_t odd_arg2;
-    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
-    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
-    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
-}
-
-static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
-                                     int64_t arg2)
-{
-    int64_t even_arg1;
-    int64_t even_arg2;
-    int64_t odd_arg1;
-    int64_t odd_arg2;
-    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
-    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
-    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
-}
-
 static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1,
                                     int64_t arg2)
 {
@@ -5010,12 +5780,6 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
     }                                                                         \
 }
 
-MSA_TEROP_DF(maddv)
-MSA_TEROP_DF(msubv)
-MSA_TEROP_DF(dpadd_s)
-MSA_TEROP_DF(dpadd_u)
-MSA_TEROP_DF(dpsub_s)
-MSA_TEROP_DF(dpsub_u)
 MSA_TEROP_DF(binsl)
 MSA_TEROP_DF(binsr)
 MSA_TEROP_DF(madd_q)
@@ -5427,54 +6191,80 @@ static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr)
 #define CLEAR_IS_INEXACT   2
 #define RECIPROCAL_INEXACT 4
 
-static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
+static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt)
 {
-    int ieee_ex;
+    int mips_xcpt = 0;
+
+    if (ieee_xcpt & float_flag_invalid) {
+        mips_xcpt |= FP_INVALID;
+    }
+    if (ieee_xcpt & float_flag_overflow) {
+        mips_xcpt |= FP_OVERFLOW;
+    }
+    if (ieee_xcpt & float_flag_underflow) {
+        mips_xcpt |= FP_UNDERFLOW;
+    }
+    if (ieee_xcpt & float_flag_divbyzero) {
+        mips_xcpt |= FP_DIV0;
+    }
+    if (ieee_xcpt & float_flag_inexact) {
+        mips_xcpt |= FP_INEXACT;
+    }
+
+    return mips_xcpt;
+}
 
-    int c;
+static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
+{
+    int ieee_exception_flags;
+    int mips_exception_flags = 0;
     int cause;
     int enable;
 
-    ieee_ex = get_float_exception_flags(&env->active_tc.msa_fp_status);
+    ieee_exception_flags = get_float_exception_flags(
+                               &env->active_tc.msa_fp_status);
 
     /* QEMU softfloat does not signal all underflow cases */
     if (denormal) {
-        ieee_ex |= float_flag_underflow;
+        ieee_exception_flags |= float_flag_underflow;
+    }
+    if (ieee_exception_flags) {
+        mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags);
     }
-
-    c = ieee_ex_to_mips(ieee_ex);
     enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
 
     /* Set Inexact (I) when flushing inputs to zero */
-    if ((ieee_ex & float_flag_input_denormal) &&
+    if ((ieee_exception_flags & float_flag_input_denormal) &&
             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
         if (action & CLEAR_IS_INEXACT) {
-            c &= ~FP_INEXACT;
+            mips_exception_flags &= ~FP_INEXACT;
         } else {
-            c |=  FP_INEXACT;
+            mips_exception_flags |= FP_INEXACT;
         }
     }
 
     /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
-    if ((ieee_ex & float_flag_output_denormal) &&
+    if ((ieee_exception_flags & float_flag_output_denormal) &&
             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
-        c |= FP_INEXACT;
+        mips_exception_flags |= FP_INEXACT;
         if (action & CLEAR_FS_UNDERFLOW) {
-            c &= ~FP_UNDERFLOW;
+            mips_exception_flags &= ~FP_UNDERFLOW;
         } else {
-            c |=  FP_UNDERFLOW;
+            mips_exception_flags |= FP_UNDERFLOW;
         }
     }
 
     /* Set Inexact (I) when Overflow (O) is not enabled */
-    if ((c & FP_OVERFLOW) != 0 && (enable & FP_OVERFLOW) == 0) {
-        c |= FP_INEXACT;
+    if ((mips_exception_flags & FP_OVERFLOW) != 0 &&
+           (enable & FP_OVERFLOW) == 0) {
+        mips_exception_flags |= FP_INEXACT;
     }
 
     /* Clear Exact Underflow when Underflow (U) is not enabled */
-    if ((c & FP_UNDERFLOW) != 0 && (enable & FP_UNDERFLOW) == 0 &&
-            (c & FP_INEXACT) == 0) {
-        c &= ~FP_UNDERFLOW;
+    if ((mips_exception_flags & FP_UNDERFLOW) != 0 &&
+           (enable & FP_UNDERFLOW) == 0 &&
+           (mips_exception_flags & FP_INEXACT) == 0) {
+        mips_exception_flags &= ~FP_UNDERFLOW;
     }
 
     /*
@@ -5482,11 +6272,11 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
      * divide by zero
      */
     if ((action & RECIPROCAL_INEXACT) &&
-            (c & (FP_INVALID | FP_DIV0)) == 0) {
-        c = FP_INEXACT;
+            (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) {
+        mips_exception_flags = FP_INEXACT;
     }
 
-    cause = c & enable;    /* all current enabled exceptions */
+    cause = mips_exception_flags & enable; /* all current enabled exceptions */
 
     if (cause == 0) {
         /*
@@ -5494,7 +6284,7 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
          * with all current exceptions
          */
         SET_FP_CAUSE(env->active_tc.msacsr,
-                (GET_FP_CAUSE(env->active_tc.msacsr) | c));
+            (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
     } else {
         /* Current exceptions are enabled */
         if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
@@ -5503,11 +6293,11 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
              * with all enabled exceptions
              */
             SET_FP_CAUSE(env->active_tc.msacsr,
-                    (GET_FP_CAUSE(env->active_tc.msacsr) | c));
+                (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
         }
     }
 
-    return c;
+    return mips_exception_flags;
 }
 
 static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
@@ -5516,7 +6306,7 @@ static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
     return c & enable;
 }
 
-static inline float16 float16_from_float32(int32_t a, flag ieee,
+static inline float16 float16_from_float32(int32_t a, bool ieee,
                                            float_status *status)
 {
       float16 f_val;
@@ -5535,7 +6325,7 @@ static inline float32 float32_from_float64(int64_t a, float_status *status)
       return a < 0 ? (f_val | (1 << 31)) : f_val;
 }
 
-static inline float32 float32_from_float16(int16_t a, flag ieee,
+static inline float32 float32_from_float16(int16_t a, bool ieee,
                                            float_status *status)
 {
       float32 f_val;
@@ -6572,7 +7362,7 @@ void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
              * IEEE and "ARM" format.  The latter gains extra exponent
              * range by omitting the NaN/Inf encodings.
              */
-            flag ieee = 1;
+            bool ieee = true;
 
             MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
             MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16);
@@ -7186,7 +7976,7 @@ void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
              * IEEE and "ARM" format.  The latter gains extra exponent
              * range by omitting the NaN/Inf encodings.
              */
-            flag ieee = 1;
+            bool ieee = true;
 
             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
         }
@@ -7222,7 +8012,7 @@ void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
              * IEEE and "ARM" format.  The latter gains extra exponent
              * range by omitting the NaN/Inf encodings.
              */
-            flag ieee = 1;
+            bool ieee = true;
 
             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
         }
diff --git a/qemu/target/mips/op_helper.c b/qemu/target/mips/op_helper.c
index 9802b9cebd..f8119b999e 100644
--- a/qemu/target/mips/op_helper.c
+++ b/qemu/target/mips/op_helper.c
@@ -618,6 +618,7 @@ static inline uint64_t get_tlb_pfn_from_entrylo(uint64_t entrylo)
 
 static void r4k_fill_tlb(CPUMIPSState *env, int idx)
 {
+    struct uc_struct *uc = env->uc;
     r4k_tlb_t *tlb;
     uint64_t mask = env->CP0_PageMask >> (TARGET_PAGE_BITS + 1);
 
@@ -682,6 +683,7 @@ void r4k_helper_tlbinvf(CPUMIPSState *env)
 
 void r4k_helper_tlbwi(CPUMIPSState *env)
 {
+    struct uc_struct *uc = env->uc;
     bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
     target_ulong VPN;
     uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
@@ -738,6 +740,7 @@ void r4k_helper_tlbwr(CPUMIPSState *env)
 
 void r4k_helper_tlbp(CPUMIPSState *env)
 {
+    struct uc_struct *uc = env->uc;
     bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
     r4k_tlb_t *tlb;
     target_ulong mask;
@@ -1241,6 +1244,7 @@ static inline void ensure_writable_pages(CPUMIPSState *env,
                                          int mmu_idx,
                                          uintptr_t retaddr)
 {
+    struct uc_struct *uc = env->uc;
     /* FIXME: Probe the actual accesses (pass and use a size) */
     if (unlikely(MSA_PAGESPAN(addr))) {
         /* first page */
diff --git a/qemu/target/mips/translate.c b/qemu/target/mips/translate.c
index 3fab57b251..b8c82c82d8 100644
--- a/qemu/target/mips/translate.c
+++ b/qemu/target/mips/translate.c
@@ -1040,7 +1040,7 @@ enum {
     OPC_BC2NEZ  = (0x0D << 21) | OPC_CP2,
 };
 
-#define MASK_LMI(op)    (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F))
+#define MASK_LMMI(op)    (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F))
 
 enum {
     OPC_PADDSH      = (24 << 21) | (0x00) | OPC_CP2,
@@ -3384,7 +3384,8 @@ static void gen_ld(DisasContext *ctx, uint32_t opc,
     TCGv t0, t1, t2;
     int mem_idx = ctx->mem_idx;
 
-    if (rt == 0 && ctx->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)) {
+    if (rt == 0 && ctx->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F |
+                                      INSN_LOONGSON3A)) {
         /*
          * Loongson CPU uses a load to zero register for prefetch.
          * We emulate it as a NOP. On other CPU we must perform the
@@ -5520,7 +5521,7 @@ static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
     TCGv_i64 t0, t1;
     TCGCond cond;
 
-    opc = MASK_LMI(ctx->opcode);
+    opc = MASK_LMMI(ctx->opcode);
     switch (opc) {
     case OPC_ADD_CP2:
     case OPC_SUB_CP2:
@@ -5995,6 +5996,7 @@ static void gen_trap(DisasContext *ctx, uint32_t opc,
 
 static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
 {
+    struct uc_struct *uc = ctx->uc;
     if (unlikely(ctx->base.singlestep_enabled)) {
         return false;
     }
@@ -27207,7 +27209,7 @@ static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx)
     case OPC_MULTU_G_2F:
     case OPC_MOD_G_2F:
     case OPC_MODU_G_2F:
-        check_insn(ctx, INSN_LOONGSON2F);
+        check_insn(ctx, INSN_LOONGSON2F | ASE_LEXT);
         gen_loongson_integer(ctx, op1, rd, rs, rt);
         break;
     case OPC_CLO:
@@ -27240,7 +27242,7 @@ static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx)
     case OPC_DDIVU_G_2F:
     case OPC_DMOD_G_2F:
     case OPC_DMODU_G_2F:
-        check_insn(ctx, INSN_LOONGSON2F);
+        check_insn(ctx, INSN_LOONGSON2F | ASE_LEXT);
         gen_loongson_integer(ctx, op1, rd, rs, rt);
         break;
 #endif
@@ -29097,6 +29099,38 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx)
             break;
         }
         break;
+    case OPC_MADDV_df:
+        switch (df) {
+        case DF_BYTE:
+            gen_helper_msa_maddv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_HALF:
+            gen_helper_msa_maddv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_WORD:
+            gen_helper_msa_maddv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_DOUBLE:
+            gen_helper_msa_maddv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        }
+        break;
+    case OPC_MSUBV_df:
+        switch (df) {
+        case DF_BYTE:
+            gen_helper_msa_msubv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_HALF:
+            gen_helper_msa_msubv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_WORD:
+            gen_helper_msa_msubv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_DOUBLE:
+            gen_helper_msa_msubv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        }
+        break;
     case OPC_ASUB_S_df:
         switch (df) {
         case DF_BYTE:
@@ -29306,10 +29340,36 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx)
         }
         break;
     case OPC_SUBS_S_df:
-        gen_helper_msa_subs_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+        switch (df) {
+        case DF_BYTE:
+            gen_helper_msa_subs_s_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_HALF:
+            gen_helper_msa_subs_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_WORD:
+            gen_helper_msa_subs_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_DOUBLE:
+            gen_helper_msa_subs_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        }
         break;
     case OPC_MULV_df:
-        gen_helper_msa_mulv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+        switch (df) {
+        case DF_BYTE:
+            gen_helper_msa_mulv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_HALF:
+            gen_helper_msa_mulv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_WORD:
+            gen_helper_msa_mulv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_DOUBLE:
+            gen_helper_msa_mulv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        }
         break;
     case OPC_SLD_df:
         gen_helper_msa_sld_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
@@ -29318,25 +29378,71 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx)
         gen_helper_msa_vshf_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
         break;
     case OPC_SUBV_df:
-        gen_helper_msa_subv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+        switch (df) {
+        case DF_BYTE:
+            gen_helper_msa_subv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_HALF:
+            gen_helper_msa_subv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_WORD:
+            gen_helper_msa_subv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_DOUBLE:
+            gen_helper_msa_subv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        }
         break;
     case OPC_SUBS_U_df:
-        gen_helper_msa_subs_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_MADDV_df:
-        gen_helper_msa_maddv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+        switch (df) {
+        case DF_BYTE:
+            gen_helper_msa_subs_u_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_HALF:
+            gen_helper_msa_subs_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_WORD:
+            gen_helper_msa_subs_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_DOUBLE:
+            gen_helper_msa_subs_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        }
         break;
     case OPC_SPLAT_df:
         gen_helper_msa_splat_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
         break;
     case OPC_SUBSUS_U_df:
-        gen_helper_msa_subsus_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_MSUBV_df:
-        gen_helper_msa_msubv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+        switch (df) {
+        case DF_BYTE:
+            gen_helper_msa_subsus_u_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_HALF:
+            gen_helper_msa_subsus_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_WORD:
+            gen_helper_msa_subsus_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_DOUBLE:
+            gen_helper_msa_subsus_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        }
         break;
     case OPC_SUBSUU_S_df:
-        gen_helper_msa_subsuu_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+        switch (df) {
+        case DF_BYTE:
+            gen_helper_msa_subsuu_s_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_HALF:
+            gen_helper_msa_subsuu_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_WORD:
+            gen_helper_msa_subsuu_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        case DF_DOUBLE:
+            gen_helper_msa_subsuu_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+            break;
+        }
         break;
 
     case OPC_DOTP_S_df:
@@ -29407,22 +29513,82 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx)
             }
             break;
         case OPC_DOTP_S_df:
-            gen_helper_msa_dotp_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+            switch (df) {
+            case DF_HALF:
+                gen_helper_msa_dotp_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_WORD:
+                gen_helper_msa_dotp_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_DOUBLE:
+                gen_helper_msa_dotp_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            }
             break;
         case OPC_DOTP_U_df:
-            gen_helper_msa_dotp_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+            switch (df) {
+            case DF_HALF:
+                gen_helper_msa_dotp_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_WORD:
+                gen_helper_msa_dotp_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_DOUBLE:
+                gen_helper_msa_dotp_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            }
             break;
         case OPC_DPADD_S_df:
-            gen_helper_msa_dpadd_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+            switch (df) {
+            case DF_HALF:
+                gen_helper_msa_dpadd_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_WORD:
+                gen_helper_msa_dpadd_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_DOUBLE:
+                gen_helper_msa_dpadd_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            }
             break;
         case OPC_DPADD_U_df:
-            gen_helper_msa_dpadd_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+            switch (df) {
+            case DF_HALF:
+                gen_helper_msa_dpadd_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_WORD:
+                gen_helper_msa_dpadd_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_DOUBLE:
+                gen_helper_msa_dpadd_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            }
             break;
         case OPC_DPSUB_S_df:
-            gen_helper_msa_dpsub_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+            switch (df) {
+            case DF_HALF:
+                gen_helper_msa_dpsub_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_WORD:
+                gen_helper_msa_dpsub_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_DOUBLE:
+                gen_helper_msa_dpsub_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            }
             break;
         case OPC_DPSUB_U_df:
-            gen_helper_msa_dpsub_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt);
+            switch (df) {
+            case DF_HALF:
+                gen_helper_msa_dpsub_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_WORD:
+                gen_helper_msa_dpsub_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            case DF_DOUBLE:
+                gen_helper_msa_dpsub_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt);
+                break;
+            }
             break;
         }
         break;
@@ -30683,7 +30849,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
         }
         break;
     case OPC_CP2:
-        check_insn(ctx, INSN_LOONGSON2F);
+        check_insn(ctx, ASE_LMMI);
         /* Note that these instructions use different fields.  */
         gen_loongson_multimedia(ctx, sa, rd, rt);
         break;
@@ -30849,7 +31015,8 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     CPUMIPSState *env = cs->env_ptr;
 
     // unicorn setup
-    ctx->uc = cs->uc;
+    struct uc_struct *uc = cs->uc;
+    ctx->uc = uc;
 
     ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
     ctx->saved_pc = -1;
@@ -31238,7 +31405,9 @@ void cpu_state_reset(CPUMIPSState *env)
     env->CP0_Config5 = env->cpu_model->CP0_Config5;
     env->CP0_Config5_rw_bitmask = env->cpu_model->CP0_Config5_rw_bitmask;
     env->CP0_Config6 = env->cpu_model->CP0_Config6;
+    env->CP0_Config6_rw_bitmask = env->cpu_model->CP0_Config6_rw_bitmask;
     env->CP0_Config7 = env->cpu_model->CP0_Config7;
+    env->CP0_Config7_rw_bitmask = env->cpu_model->CP0_Config7_rw_bitmask;
     env->CP0_LLAddr_rw_bitmask = env->cpu_model->CP0_LLAddr_rw_bitmask
                                  << env->cpu_model->CP0_LLAddr_shift;
     env->CP0_LLAddr_shift = env->cpu_model->CP0_LLAddr_shift;
diff --git a/qemu/target/mips/translate_init.inc.c b/qemu/target/mips/translate_init.inc.c
index 3e395c7e6a..02885b5c65 100644
--- a/qemu/target/mips/translate_init.inc.c
+++ b/qemu/target/mips/translate_init.inc.c
@@ -366,7 +366,7 @@ const mips_def_t mips_defs[] =
     },
     {
         /* FIXME:
-         * Config3: CMGCR, PW, VZ, CTXTC, CDMM, TL
+         * Config3: VZ, CTXTC, CDMM, TL
          * Config4: MMUExtDef
          * Config5: MRP
          * FIR(FCR0): Has2008
@@ -380,10 +380,11 @@ const mips_def_t mips_defs[] =
                        (2 << CP0C1_DS) | (4 << CP0C1_DL) | (3 << CP0C1_DA) |
                        (1 << CP0C1_PC) | (1 << CP0C1_FP),
         .CP0_Config2 = MIPS_CONFIG2,
-        .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | (1 << CP0C3_MSAP) |
+        .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) |
+                       (1 << CP0C3_CMGCR) | (1 << CP0C3_MSAP) |
                        (1 << CP0C3_BP) | (1 << CP0C3_BI) | (1 << CP0C3_SC) |
-                       (1 << CP0C3_ULRI) | (1 << CP0C3_RXI) | (1 << CP0C3_LPA) |
-                       (1 << CP0C3_VInt),
+                       (1 << CP0C3_PW) | (1 << CP0C3_ULRI) | (1 << CP0C3_RXI) |
+                       (1 << CP0C3_LPA) | (1 << CP0C3_VInt),
         .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (2 << CP0C4_IE) |
                        (0x1c << CP0C4_KScrExist),
         .CP0_Config4_rw_bitmask = 0,
@@ -801,6 +802,92 @@ const mips_def_t mips_defs[] =
         .insn_flags = CPU_LOONGSON2F,
         .mmu_type = MMU_TYPE_R4000,
     },
+    {
+        .name = "Loongson-3A1000",
+        .CP0_PRid = 0x6305,
+        /* 64KB I-cache and d-cache. 4 way with 32 bit cache line size.  */
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) |
+                       (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) |
+                       (3 << CP0C1_IS) | (4 << CP0C1_IL) | (3 << CP0C1_IA) |
+                       (3 << CP0C1_DS) | (4 << CP0C1_DL) | (3 << CP0C1_DA) |
+                       (1 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP),
+        .CP0_Config2 = MIPS_CONFIG2 | (7 << CP0C2_SS) | (4 << CP0C2_SL) |
+                       (3 << CP0C2_SA),
+        .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_LPA),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x74D8FFFF,
+        .CP0_PageGrain = (1 << CP0PG_ELPA),
+        .CP0_PageGrain_rw_bitmask = (1 << CP0PG_ELPA),
+        .CP1_fcr0 = (0x5 << FCR0_PRID) | (0x1 << FCR0_REV) | (0x1 << FCR0_F64) |
+                    (0x1 << FCR0_PS) | (0x1 << FCR0_L) | (0x1 << FCR0_W) |
+                    (0x1 << FCR0_D) | (0x1 << FCR0_S),
+        .CP1_fcr31 = 0,
+        .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
+        .SEGBITS = 42,
+        .PABITS = 48,
+        .insn_flags = CPU_LOONGSON3A,
+        .mmu_type = MMU_TYPE_R4000,
+    },
+    {
+        .name = "Loongson-3A4000",
+        .CP0_PRid = 0x14C000,
+        /* 64KB I-cache and d-cache. 4 way with 32 bit cache line size.  */
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) |
+                       (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) |
+                       (2 << CP0C1_IS) | (5 << CP0C1_IL) | (3 << CP0C1_IA) |
+                       (2 << CP0C1_DS) | (5 << CP0C1_DL) | (3 << CP0C1_DA) |
+                       (1 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP),
+        .CP0_Config2 = MIPS_CONFIG2 | (5 << CP0C2_SS) | (5 << CP0C2_SL) |
+                       (15 << CP0C2_SA),
+        .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | (1 << CP0C3_MSAP) |
+                       (1 << CP0C3_BP) | (1 << CP0C3_BI) | (1 << CP0C3_ULRI) |
+                       (1 << CP0C3_RXI) | (1 << CP0C3_LPA) | (1 << CP0C3_VInt),
+        .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (2 << CP0C4_IE) |
+                       (1 << CP0C4_AE) | (0x1c << CP0C4_KScrExist),
+        .CP0_Config4_rw_bitmask = 0,
+        .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_CRCP) | (1 << CP0C5_NFExists),
+        .CP0_Config5_rw_bitmask = (1 << CP0C5_K) | (1 << CP0C5_CV) |
+                                  (1 << CP0C5_MSAEn) | (1 << CP0C5_UFE) |
+                                  (1 << CP0C5_FRE) | (1 << CP0C5_SBRI),
+        .CP0_Config6 = (1 << CP0C6_VCLRU) | (1 << CP0C6_DCLRU) |
+                       (1 << CP0C6_SFBEN) | (1 << CP0C6_VLTINT) |
+                       (1 << CP0C6_INSTPREF) | (1 << CP0C6_DATAPREF),
+        .CP0_Config6_rw_bitmask = (1 << CP0C6_BPPASS) | (0x3f << CP0C6_KPOS) |
+                       (1 << CP0C6_KE) | (1 << CP0C6_VTLBONLY) |
+                       (1 << CP0C6_LASX) | (1 << CP0C6_SSEN) |
+                       (1 << CP0C6_DISDRTIME) | (1 << CP0C6_PIXNUEN) |
+                       (1 << CP0C6_SCRAND) | (1 << CP0C6_LLEXCEN) |
+                       (1 << CP0C6_DISVC) | (1 << CP0C6_VCLRU) |
+                       (1 << CP0C6_DCLRU) | (1 << CP0C6_PIXUEN) |
+                       (1 << CP0C6_DISBLKLYEN) | (1 << CP0C6_UMEMUALEN) |
+                       (1 << CP0C6_SFBEN) | (1 << CP0C6_FLTINT) |
+                       (1 << CP0C6_VLTINT) | (1 << CP0C6_DISBTB) |
+                       (3 << CP0C6_STPREFCTL) | (1 << CP0C6_INSTPREF) |
+                       (1 << CP0C6_DATAPREF),
+        .CP0_Config7 = 0,
+        .CP0_Config7_rw_bitmask = (1 << CP0C7_NAPCGEN) | (1 << CP0C7_UNIMUEN) |
+                                  (1 << CP0C7_VFPUCGEN),
+        .CP0_LLAddr_rw_bitmask = 1,
+        .SYNCI_Step = 16,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x7DDBFFFF,
+        .CP0_PageGrain = (1 << CP0PG_ELPA),
+        .CP0_PageGrain_rw_bitmask = (1U << CP0PG_RIE) | (1 << CP0PG_XIE) |
+                    (1 << CP0PG_ELPA) | (1 << CP0PG_IEC),
+        .CP1_fcr0 = (0x5 << FCR0_PRID) | (0x1 << FCR0_REV) | (0x1 << FCR0_F64) |
+                    (0x1 << FCR0_PS) | (0x1 << FCR0_L) | (0x1 << FCR0_W) |
+                    (0x1 << FCR0_D) | (0x1 << FCR0_S),
+        .CP1_fcr31 = 0,
+        .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
+        .SEGBITS = 48,
+        .PABITS = 48,
+        .insn_flags = CPU_LOONGSON3A,
+        .mmu_type = MMU_TYPE_R4000,
+    },
     {
         /* A generic CPU providing MIPS64 DSP R2 ASE features.
            FIXME: Eventually this should be replaced by a real CPU model. */
diff --git a/qemu/target/ppc/cpu.h b/qemu/target/ppc/cpu.h
index 26ed16808c..f7b127c9a3 100644
--- a/qemu/target/ppc/cpu.h
+++ b/qemu/target/ppc/cpu.h
@@ -129,8 +129,9 @@ enum {
     POWERPC_EXCP_SDOOR_HV = 100,
     /* ISA 3.00 additions */
     POWERPC_EXCP_HVIRT    = 101,
+    POWERPC_EXCP_SYSCALL_VECTORED = 102, /* scv exception                     */
     /* EOL                                                                   */
-    POWERPC_EXCP_NB       = 102,
+    POWERPC_EXCP_NB       = 103,
     /* QEMU exceptions: used internally during code translation              */
     POWERPC_EXCP_STOP         = 0x200, /* stop translation                   */
     POWERPC_EXCP_BRANCH       = 0x201, /* branch instruction                 */
@@ -460,6 +461,9 @@ typedef struct ppc_v3_pate_t {
 #define DSISR_AMR                0x00200000
 /* Unsupported Radix Tree Configuration */
 #define DSISR_R_BADCONFIG        0x00080000
+#define DSISR_ATOMIC_RC          0x00040000
+/* Unable to translate address of (guest) pde or process/page table entry */
+#define DSISR_PRTABLE_FAULT      0x00020000
 
 /* SRR1 error code fields */
 
@@ -469,9 +473,31 @@ typedef struct ppc_v3_pate_t {
 #define SRR1_PROTFAULT           DSISR_PROTFAULT
 #define SRR1_IAMR                DSISR_AMR
 
+/* SRR1[42:45] wakeup fields for System Reset Interrupt */
+
+#define SRR1_WAKEMASK           0x003c0000 /* reason for wakeup */
+
+#define SRR1_WAKEHMI            0x00280000 /* Hypervisor maintenance */
+#define SRR1_WAKEHVI            0x00240000 /* Hypervisor Virt. Interrupt (P9) */
+#define SRR1_WAKEEE             0x00200000 /* External interrupt */
+#define SRR1_WAKEDEC            0x00180000 /* Decrementer interrupt */
+#define SRR1_WAKEDBELL          0x00140000 /* Privileged doorbell */
+#define SRR1_WAKERESET          0x00100000 /* System reset */
+#define SRR1_WAKEHDBELL         0x000c0000 /* Hypervisor doorbell */
+#define SRR1_WAKESCOM           0x00080000 /* SCOM not in power-saving mode */
+
+/* SRR1[46:47] power-saving exit mode */
+
+#define SRR1_WAKESTATE          0x00030000 /* Powersave exit mask */
+
+#define SRR1_WS_HVLOSS          0x00030000 /* HV resources not maintained */
+#define SRR1_WS_GPRLOSS         0x00020000 /* GPRs not maintained */
+#define SRR1_WS_NOLOSS          0x00010000 /* All resources maintained */
+
 /* Facility Status and Control (FSCR) bits */
 #define FSCR_EBB        (63 - 56) /* Event-Based Branch Facility */
 #define FSCR_TAR        (63 - 55) /* Target Address Register */
+#define FSCR_SCV        (63 - 51) /* System call vectored */
 /* Interrupt cause mask and position in FSCR. HFSCR has the same format */
 #define FSCR_IC_MASK    (0xFFULL)
 #define FSCR_IC_POS     (63 - 7)
@@ -481,6 +507,7 @@ typedef struct ppc_v3_pate_t {
 #define FSCR_IC_TM          5
 #define FSCR_IC_EBB         7
 #define FSCR_IC_TAR         8
+#define FSCR_IC_SCV        12
 
 /* Exception state register bits definition                                  */
 #define ESR_PIL   PPC_BIT(36) /* Illegal Instruction                    */
@@ -548,6 +575,8 @@ enum {
     POWERPC_FLAG_VSX      = 0x00080000,
     /* Has Transaction Memory (ISA 2.07)                                     */
     POWERPC_FLAG_TM       = 0x00100000,
+    /* Has SCV (ISA 3.00)                                                    */
+    POWERPC_FLAG_SCV      = 0x00200000,
 };
 
 /*****************************************************************************/
@@ -1206,7 +1235,7 @@ void ppc_cpu_do_interrupt(CPUState *cpu);
 bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req);
 hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 
-void ppc_cpu_do_system_reset(CPUState *cs, target_ulong vector);
+void ppc_cpu_do_system_reset(CPUState *cs);
 void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector);
 #if 0
 extern const VMStateDescription vmstate_ppc_cpu;
diff --git a/qemu/target/ppc/dfp_helper.c b/qemu/target/ppc/dfp_helper.c
index a025ed362e..a6a398cf5c 100644
--- a/qemu/target/ppc/dfp_helper.c
+++ b/qemu/target/ppc/dfp_helper.c
@@ -113,7 +113,7 @@ static void dfp_set_round_mode_from_immediate(uint8_t r, uint8_t rmc,
         case 3: /* use FPSCR rounding mode */
             return;
         default:
-            assert(0); /* cannot get here */
+            g_assert_not_reached(); /* cannot get here */
         }
     } else { /* r == 1 */
         switch (rmc & 3) {
@@ -130,7 +130,7 @@ static void dfp_set_round_mode_from_immediate(uint8_t r, uint8_t rmc,
             rnd = DEC_ROUND_HALF_DOWN;
             break;
         default:
-            assert(0); /* cannot get here */
+            g_assert_not_reached(); /* cannot get here */
         }
     }
     decContextSetRounding(&dfp->context, rnd);
diff --git a/qemu/target/ppc/excp_helper.c b/qemu/target/ppc/excp_helper.c
index 298b7730a1..3cfdae9ab2 100644
--- a/qemu/target/ppc/excp_helper.c
+++ b/qemu/target/ppc/excp_helper.c
@@ -38,12 +38,27 @@
 /* Exception processing */
 static inline void dump_syscall(CPUPPCState *env)
 {
-    qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64 " r3=%016" PRIx64
+    qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64
+                  " r3=%016" PRIx64 " r4=%016" PRIx64 " r5=%016" PRIx64
+                  " r6=%016" PRIx64 " r7=%016" PRIx64 " r8=%016" PRIx64
                   " r4=%016" PRIx64 " r5=%016" PRIx64 " r6=%016" PRIx64
                   " nip=" TARGET_FMT_lx "\n",
                   ppc_dump_gpr(env, 0), ppc_dump_gpr(env, 3),
                   ppc_dump_gpr(env, 4), ppc_dump_gpr(env, 5),
-                  ppc_dump_gpr(env, 6), env->nip);
+                  ppc_dump_gpr(env, 6), ppc_dump_gpr(env, 7),
+                  ppc_dump_gpr(env, 8), env->nip);
+}
+
+static inline void dump_syscall_vectored(CPUPPCState *env)
+{
+    qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64
+                  " r3=%016" PRIx64 " r4=%016" PRIx64 " r5=%016" PRIx64
+                  " r6=%016" PRIx64 " r7=%016" PRIx64 " r8=%016" PRIx64
+                  " nip=" TARGET_FMT_lx "\n",
+                  ppc_dump_gpr(env, 0), ppc_dump_gpr(env, 3),
+                  ppc_dump_gpr(env, 4), ppc_dump_gpr(env, 5),
+                  ppc_dump_gpr(env, 6), ppc_dump_gpr(env, 7),
+                  ppc_dump_gpr(env, 8), env->nip);
 }
 
 static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp,
@@ -53,7 +68,7 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp,
     env->resume_as_sreset = false;
 
     /* Pretend to be returning from doze always as we don't lose state */
-    *msr |= (0x1ull << (63 - 47));
+    *msr |= SRR1_WS_NOLOSS;
 
     /* Machine checks are sent normally */
     if (excp == POWERPC_EXCP_MCHECK) {
@@ -61,25 +76,25 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp,
     }
     switch (excp) {
     case POWERPC_EXCP_RESET:
-        *msr |= 0x4ull << (63 - 45);
+        *msr |= SRR1_WAKERESET;
         break;
     case POWERPC_EXCP_EXTERNAL:
-        *msr |= 0x8ull << (63 - 45);
+        *msr |= SRR1_WAKEEE;
         break;
     case POWERPC_EXCP_DECR:
-        *msr |= 0x6ull << (63 - 45);
+        *msr |= SRR1_WAKEDEC;
         break;
     case POWERPC_EXCP_SDOOR:
-        *msr |= 0x5ull << (63 - 45);
+        *msr |= SRR1_WAKEDBELL;
         break;
     case POWERPC_EXCP_SDOOR_HV:
-        *msr |= 0x3ull << (63 - 45);
+        *msr |= SRR1_WAKEHDBELL;
         break;
     case POWERPC_EXCP_HV_MAINT:
-        *msr |= 0xaull << (63 - 45);
+        *msr |= SRR1_WAKEHMI;
         break;
     case POWERPC_EXCP_HVIRT:
-        *msr |= 0x9ull << (63 - 45);
+        *msr |= SRR1_WAKEHVI;
         break;
     default:
         cpu_abort(cs, "Unsupported exception %d in Power Save mode\n",
@@ -149,7 +164,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
     target_ulong msr, new_msr, vector;
-    int srr0, srr1, asrr0, asrr1, lev, ail;
+    int srr0, srr1, asrr0, asrr1, lev = -1, ail;
     bool lpes0;
 
     qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
@@ -388,6 +403,13 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             new_msr |= (target_ulong)MSR_HVB;
         }
         break;
+    case POWERPC_EXCP_SYSCALL_VECTORED: /* scv exception                     */
+        lev = env->error_code;
+        dump_syscall_vectored(env);
+        env->nip += 4;
+        new_msr |= env->msr & ((target_ulong)1 << MSR_EE);
+        new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
+        break;
     case POWERPC_EXCP_FPU:       /* Floating-point unavailable exception     */
     case POWERPC_EXCP_APU:       /* Auxiliary processor unavailable          */
     case POWERPC_EXCP_DECR:      /* Decrementer exception                    */
@@ -476,6 +498,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     case POWERPC_EXCP_HDECR:     /* Hypervisor decrementer exception         */
     case POWERPC_EXCP_HDSI:      /* Hypervisor data storage exception        */
     case POWERPC_EXCP_HISI:      /* Hypervisor instruction storage exception */
+        msr |= env->error_code;
     case POWERPC_EXCP_HDSEG:     /* Hypervisor data segment exception        */
     case POWERPC_EXCP_HISEG:     /* Hypervisor instruction segment exception */
     case POWERPC_EXCP_SDOOR_HV:  /* Hypervisor Doorbell interrupt            */
@@ -690,12 +713,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         break;
     }
 
-    /* Save PC */
-    env->spr[srr0] = env->nip;
-
-    /* Save MSR */
-    env->spr[srr1] = msr;
-
     /* Sanity check */
     if (!(env->msr_mask & MSR_HVB)) {
         if (new_msr & MSR_HVB) {
@@ -708,14 +725,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         }
     }
 
-    /* If any alternate SRR register are defined, duplicate saved values */
-    if (asrr0 != -1) {
-        env->spr[asrr0] = env->spr[srr0];
-    }
-    if (asrr1 != -1) {
-        env->spr[asrr1] = env->spr[srr1];
-    }
-
     /*
      * Sort out endianness of interrupt, this differs depending on the
      * CPU, the HV mode, etc...
@@ -750,18 +759,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     }
 #endif
 
-    /* Jump to handler */
-    vector = env->excp_vectors[excp];
-#ifdef _MSC_VER
-    if (vector == (target_ulong)(0ULL - 1ULL)) {
-#else
-    if (vector == (target_ulong)-1ULL) {
-#endif
-        cpu_abort(cs, "Raised an exception without defined vector %d\n",
-                  excp);
-    }
-    vector |= env->excp_prefix;
-
     /*
      * AIL only works if there is no HV transition and we are running
      * with translations enabled
@@ -770,10 +767,21 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         ((new_msr & MSR_HVB) && !(msr & MSR_HVB))) {
         ail = 0;
     }
-    /* Handle AIL */
-    if (ail) {
-        new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
-        vector |= ppc_excp_vector_offset(cs, ail);
+
+    vector = env->excp_vectors[excp];
+    if (vector == (target_ulong)-1ULL) {
+        cpu_abort(cs, "Raised an exception without defined vector %d\n",
+                  excp);
+    }
+
+    vector |= env->excp_prefix;
+
+    /* If any alternate SRR register are defined, duplicate saved values */
+    if (asrr0 != -1) {
+        env->spr[asrr0] = env->nip;
+    }
+    if (asrr1 != -1) {
+        env->spr[asrr1] = msr;
     }
 
 #if defined(TARGET_PPC64)
@@ -793,6 +801,37 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     }
 #endif
 
+    if (excp != POWERPC_EXCP_SYSCALL_VECTORED) {
+        /* Save PC */
+        env->spr[srr0] = env->nip;
+
+        /* Save MSR */
+        env->spr[srr1] = msr;
+
+        /* Handle AIL */
+        if (ail) {
+            new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
+            vector |= ppc_excp_vector_offset(cs, ail);
+        }
+
+#if defined(TARGET_PPC64)
+    } else {
+        /* scv AIL is a little different */
+        if (ail) {
+            new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
+        }
+        if (ail == AIL_C000_0000_0000_4000) {
+            vector |= 0xc000000000003000ull;
+        } else {
+            vector |= 0x0000000000017000ull;
+        }
+        vector += lev * 0x20;
+
+        env->lr = env->nip;
+        env->ctr = msr;
+#endif
+    }
+
     powerpc_set_excp_state(cpu, vector, new_msr);
 }
 
@@ -954,15 +993,12 @@ static void ppc_hw_interrupt(CPUPPCState *env)
     }
 }
 
-void ppc_cpu_do_system_reset(CPUState *cs, target_ulong vector)
+void ppc_cpu_do_system_reset(CPUState *cs)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
     CPUPPCState *env = &cpu->env;
 
     powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_RESET);
-    if (vector != -1) {
-        env->nip = vector;
-    }
 }
 
 void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector)
@@ -1135,6 +1171,11 @@ void helper_rfid(CPUPPCState *env)
     do_rfi(env, env->spr[SPR_SRR0], env->spr[SPR_SRR1]);
 }
 
+void helper_rfscv(CPUPPCState *env)
+{
+    do_rfi(env, env->lr, env->ctr);
+}
+
 void helper_hrfid(CPUPPCState *env)
 {
     do_rfi(env, env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]);
diff --git a/qemu/target/ppc/helper.h b/qemu/target/ppc/helper.h
index b1c4343908..77892dc80f 100644
--- a/qemu/target/ppc/helper.h
+++ b/qemu/target/ppc/helper.h
@@ -18,6 +18,7 @@ DEF_HELPER_1(rfmci, void, env)
 #if defined(TARGET_PPC64)
 DEF_HELPER_2(pminsn, void, env, i32)
 DEF_HELPER_1(rfid, void, env)
+DEF_HELPER_1(rfscv, void, env)
 DEF_HELPER_1(hrfid, void, env)
 DEF_HELPER_2(store_lpcr, void, env, tl)
 DEF_HELPER_2(store_pcr, void, env, tl)
@@ -215,10 +216,6 @@ DEF_HELPER_3(vsubuqm, void, avr, avr, avr)
 DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr)
 DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr)
 DEF_HELPER_3(vsubcuq, void, avr, avr, avr)
-DEF_HELPER_3(vrlb, void, avr, avr, avr)
-DEF_HELPER_3(vrlh, void, avr, avr, avr)
-DEF_HELPER_3(vrlw, void, avr, avr, avr)
-DEF_HELPER_3(vrld, void, avr, avr, avr)
 DEF_HELPER_4(vsldoi, void, avr, avr, avr, i32)
 DEF_HELPER_3(vextractub, void, avr, avr, i32)
 DEF_HELPER_3(vextractuh, void, avr, avr, i32)
diff --git a/qemu/target/ppc/int_helper.c b/qemu/target/ppc/int_helper.c
index c6ead3e149..57ede62f78 100644
--- a/qemu/target/ppc/int_helper.c
+++ b/qemu/target/ppc/int_helper.c
@@ -763,7 +763,7 @@ VCMPNE(w, u32, uint32_t, 0)
                                                                         \
         for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
             uint32_t result;                                            \
-            int rel = float32_compare_quiet(a->f32[i], b->f32[i],       \
+            FloatRelation rel = float32_compare_quiet(a->f32[i], b->f32[i],       \
                                             &env->vec_status);          \
             if (rel == float_relation_unordered) {                      \
                 result = 0;                                             \
@@ -796,14 +796,14 @@ static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
     int all_in = 0;
 
     for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
-        int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
+        FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
                                            &env->vec_status);
         if (le_rel == float_relation_unordered) {
             r->u32[i] = 0xc0000000;
             all_in = 1;
         } else {
             float32 bneg = float32_chs(b->f32[i]);
-            int ge_rel = float32_compare_quiet(a->f32[i], bneg,
+            FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
                                                &env->vec_status);
             int le = le_rel != float_relation_greater;
             int ge = ge_rel != float_relation_less;
@@ -1340,23 +1340,6 @@ VRFI(p, float_round_up)
 VRFI(z, float_round_to_zero)
 #undef VRFI
 
-#define VROTATE(suffix, element, mask)                                  \
-    void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
-    {                                                                   \
-        int i;                                                          \
-                                                                        \
-        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
-            unsigned int shift = b->element[i] & mask;                  \
-            r->element[i] = (a->element[i] << shift) |                  \
-                (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
-        }                                                               \
-    }
-VROTATE(b, u8, 0x7)
-VROTATE(h, u16, 0xF)
-VROTATE(w, u32, 0x1F)
-VROTATE(d, u64, 0x3F)
-#undef VROTATE
-
 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
 {
     int i;
diff --git a/qemu/target/ppc/translate.c b/qemu/target/ppc/translate.c
index 15c9fde4f3..8e100300c2 100644
--- a/qemu/target/ppc/translate.c
+++ b/qemu/target/ppc/translate.c
@@ -170,6 +170,7 @@ struct DisasContext {
     bool vsx_enabled;
     bool spe_enabled;
     bool tm_enabled;
+    bool scv_enabled;
     bool gtse;
     ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
     int singlestep_enabled;
@@ -1946,6 +1947,7 @@ static void gen_rlwimi(DisasContext *ctx)
         tcg_gen_deposit_tl(tcg_ctx, t_ra, t_ra, t_rs, sh, me - mb + 1);
     } else {
         target_ulong mask;
+        bool mask_in_32b = true;
         TCGv t1;
 
 #if defined(TARGET_PPC64)
@@ -1954,8 +1956,13 @@ static void gen_rlwimi(DisasContext *ctx)
 #endif
         mask = MASK(mb, me);
 
+#if defined(TARGET_PPC64)
+        if (mask > 0xffffffffu) {
+            mask_in_32b = false;
+        }
+#endif
         t1 = tcg_temp_new(tcg_ctx);
-        if (mask <= 0xffffffffu) {
+        if (mask_in_32b) {
             TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx);
             tcg_gen_trunc_tl_i32(tcg_ctx, t0, t_rs);
             tcg_gen_rotli_i32(tcg_ctx, t0, t0, sh);
@@ -1998,12 +2005,18 @@ static void gen_rlwinm(DisasContext *ctx)
         tcg_gen_extract_tl(tcg_ctx, t_ra, t_rs, rsh, len);
     } else {
         target_ulong mask;
+        bool mask_in_32b = true;
 #if defined(TARGET_PPC64)
         mb += 32;
         me += 32;
 #endif
         mask = MASK(mb, me);
-        if (mask <= 0xffffffffu) {
+#if defined(TARGET_PPC64)
+        if (mask > 0xffffffffu) {
+            mask_in_32b = false;
+        }
+#endif
+        if (mask_in_32b) {
             if (sh == 0) {
                 tcg_gen_andi_tl(tcg_ctx, t_ra, t_rs, mask);
             } else {
@@ -2039,6 +2052,7 @@ static void gen_rlwnm(DisasContext *ctx)
     uint32_t mb = MB(ctx->opcode);
     uint32_t me = ME(ctx->opcode);
     target_ulong mask;
+    bool mask_in_32b = true;
 
 #if defined(TARGET_PPC64)
     mb += 32;
@@ -2046,7 +2060,12 @@ static void gen_rlwnm(DisasContext *ctx)
 #endif
     mask = MASK(mb, me);
 
-    if (mask <= 0xffffffffu) {
+#if defined(TARGET_PPC64)
+    if (mask > 0xffffffffu) {
+        mask_in_32b = false;
+    }
+#endif
+    if (mask_in_32b) {
         TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx);
         TCGv_i32 t1 = tcg_temp_new_i32(tcg_ctx);
         tcg_gen_trunc_tl_i32(tcg_ctx, t0, t_rb);
@@ -4112,6 +4131,18 @@ static void gen_rfid(DisasContext *ctx)
     gen_sync_exception(ctx);
 }
 
+static void gen_rfscv(DisasContext *ctx)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    CHK_SV;
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start(tcg_ctx);
+    }
+    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_helper_rfscv(tcg_ctx, tcg_ctx->cpu_env);
+    gen_sync_exception(ctx);
+}
+
 static void gen_hrfid(DisasContext *ctx)
 {
     TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
@@ -4124,6 +4155,7 @@ static void gen_hrfid(DisasContext *ctx)
 
 /* sc */
 #define POWERPC_SYSCALL POWERPC_EXCP_SYSCALL
+#define POWERPC_SYSCALL_VECTORED POWERPC_EXCP_SYSCALL_VECTORED
 static void gen_sc(DisasContext *ctx)
 {
     uint32_t lev;
@@ -4132,6 +4164,21 @@ static void gen_sc(DisasContext *ctx)
     gen_exception_err(ctx, POWERPC_SYSCALL, lev);
 }
 
+#if defined(TARGET_PPC64)
+static void gen_scv(DisasContext *ctx)
+{
+    uint32_t lev;
+
+    if (unlikely(!ctx->scv_enabled)) {
+        gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_SCV);
+        return;
+    }
+
+    lev = (ctx->opcode >> 5) & 0x7F;
+    gen_exception_err(ctx, POWERPC_SYSCALL_VECTORED, lev);
+}
+#endif
+
 /***                                Trap                                   ***/
 
 /* Check for unconditional traps (always or never) */
@@ -6988,6 +7035,10 @@ GEN_HANDLER(mcrf, 0x13, 0x00, 0xFF, 0x00000001, PPC_INTEGER),
 GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(rfid, 0x13, 0x12, 0x00, 0x03FF8001, PPC_64B),
+/* Top bit of opc2 corresponds with low bit of LEV, so use two handlers */
+GEN_HANDLER_E(scv, 0x11, 0x10, 0xFF, 0x03FFF01E, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(scv, 0x11, 0x00, 0xFF, 0x03FFF01E, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(rfscv, 0x13, 0x12, 0x02, 0x03FF8001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(stop, 0x13, 0x12, 0x0b, 0x03FFF801, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(doze, 0x13, 0x12, 0x0c, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
 GEN_HANDLER_E(nap, 0x13, 0x12, 0x0d, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
@@ -6995,7 +7046,9 @@ GEN_HANDLER_E(sleep, 0x13, 0x12, 0x0e, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
 GEN_HANDLER_E(rvwinkle, 0x13, 0x12, 0x0f, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
 GEN_HANDLER(hrfid, 0x13, 0x12, 0x08, 0x03FF8001, PPC_64H),
 #endif
-GEN_HANDLER(sc, 0x11, 0xFF, 0xFF, 0x03FFF01D, PPC_FLOW),
+/* Top bit of opc2 corresponds with low bit of LEV, so use two handlers */
+GEN_HANDLER(sc, 0x11, 0x11, 0xFF, 0x03FFF01D, PPC_FLOW),
+GEN_HANDLER(sc, 0x11, 0x01, 0xFF, 0x03FFF01D, PPC_FLOW),
 GEN_HANDLER(tw, 0x1F, 0x04, 0x00, 0x00000001, PPC_FLOW),
 GEN_HANDLER(twi, 0x03, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
 #if defined(TARGET_PPC64)
@@ -7541,6 +7594,12 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     } else {
         ctx->vsx_enabled = false;
     }
+    if ((env->flags & POWERPC_FLAG_SCV)
+        && (env->spr[SPR_FSCR] & (1ull << FSCR_SCV))) {
+        ctx->scv_enabled = true;
+    } else {
+        ctx->scv_enabled = false;
+    }
 #if defined(TARGET_PPC64)
     if ((env->flags & POWERPC_FLAG_TM) && msr_tm) {
         ctx->tm_enabled = !!msr_tm;
diff --git a/qemu/target/ppc/translate/fp-impl.inc.c b/qemu/target/ppc/translate/fp-impl.inc.c
index 58155f21eb..00a9d42dd8 100644
--- a/qemu/target/ppc/translate/fp-impl.inc.c
+++ b/qemu/target/ppc/translate/fp-impl.inc.c
@@ -33,170 +33,170 @@ static void gen_set_cr1_from_fpscr(DisasContext *ctx)
 #endif
 
 /***                       Floating-Point arithmetic                       ***/
-#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type)           \
-static void gen_f##name(DisasContext *ctx)                                    \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv_i64 t0;                                                              \
-    TCGv_i64 t1;                                                              \
-    TCGv_i64 t2;                                                              \
-    TCGv_i64 t3;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    t1 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    t2 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    t3 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    gen_reset_fpstatus(tcg_ctx);                                                     \
-    get_fpr(tcg_ctx, t0, rA(ctx->opcode));                                             \
-    get_fpr(tcg_ctx, t1, rC(ctx->opcode));                                             \
-    get_fpr(tcg_ctx, t2, rB(ctx->opcode));                                             \
-    gen_helper_f##op(tcg_ctx, t3, tcg_ctx->cpu_env, t0, t1, t2);                                \
-    if (isfloat) {                                                            \
-        gen_helper_frsp(tcg_ctx, t3, tcg_ctx->cpu_env, t3);                                     \
-    }                                                                         \
-    set_fpr(tcg_ctx, rD(ctx->opcode), t3);                                             \
-    if (set_fprf) {                                                           \
-        gen_compute_fprf_float64(tcg_ctx, t3);                                         \
-    }                                                                         \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_cr1_from_fpscr(ctx);                                          \
-    }                                                                         \
-    tcg_temp_free_i64(tcg_ctx, t0);                                                    \
-    tcg_temp_free_i64(tcg_ctx, t1);                                                    \
-    tcg_temp_free_i64(tcg_ctx, t2);                                                    \
-    tcg_temp_free_i64(tcg_ctx, t3);                                                    \
-}
+#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type)            \
+    static void gen_f##name(DisasContext *ctx)                                 \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv_i64 t0;                                                           \
+        TCGv_i64 t1;                                                           \
+        TCGv_i64 t2;                                                           \
+        TCGv_i64 t3;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        t1 = tcg_temp_new_i64(tcg_ctx);                                        \
+        t2 = tcg_temp_new_i64(tcg_ctx);                                        \
+        t3 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_reset_fpstatus(tcg_ctx);                                           \
+        get_fpr(tcg_ctx, t0, rA(ctx->opcode));                                 \
+        get_fpr(tcg_ctx, t1, rC(ctx->opcode));                                 \
+        get_fpr(tcg_ctx, t2, rB(ctx->opcode));                                 \
+        gen_helper_f##op(tcg_ctx, t3, tcg_ctx->cpu_env, t0, t1, t2);           \
+        if (isfloat) {                                                         \
+            gen_helper_frsp(tcg_ctx, t3, tcg_ctx->cpu_env, t3);                \
+        }                                                                      \
+        set_fpr(tcg_ctx, rD(ctx->opcode), t3);                                 \
+        if (set_fprf) {                                                        \
+            gen_compute_fprf_float64(tcg_ctx, t3);                             \
+        }                                                                      \
+        if (unlikely(Rc(ctx->opcode) != 0)) {                                  \
+            gen_set_cr1_from_fpscr(ctx);                                       \
+        }                                                                      \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+        tcg_temp_free_i64(tcg_ctx, t1);                                        \
+        tcg_temp_free_i64(tcg_ctx, t2);                                        \
+        tcg_temp_free_i64(tcg_ctx, t3);                                        \
+    }
 
-#define GEN_FLOAT_ACB(name, op2, set_fprf, type)                              \
-_GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type);                     \
-_GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type);
-
-#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type)     \
-static void gen_f##name(DisasContext *ctx)                                    \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv_i64 t0;                                                              \
-    TCGv_i64 t1;                                                              \
-    TCGv_i64 t2;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    t1 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    t2 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    gen_reset_fpstatus(tcg_ctx);                                                     \
-    get_fpr(tcg_ctx, t0, rA(ctx->opcode));                                             \
-    get_fpr(tcg_ctx, t1, rB(ctx->opcode));                                             \
-    gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1);                                    \
-    if (isfloat) {                                                            \
-        gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2);                                     \
-    }                                                                         \
-    set_fpr(tcg_ctx, rD(ctx->opcode), t2);                                             \
-    if (set_fprf) {                                                           \
-        gen_compute_fprf_float64(tcg_ctx, t2);                                         \
-    }                                                                         \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_cr1_from_fpscr(ctx);                                          \
-    }                                                                         \
-    tcg_temp_free_i64(tcg_ctx, t0);                                                    \
-    tcg_temp_free_i64(tcg_ctx, t1);                                                    \
-    tcg_temp_free_i64(tcg_ctx, t2);                                                    \
-}
-#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type)                        \
-_GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type);               \
-_GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type);
-
-#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type)     \
-static void gen_f##name(DisasContext *ctx)                                    \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv_i64 t0;                                                              \
-    TCGv_i64 t1;                                                              \
-    TCGv_i64 t2;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    t1 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    t2 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    gen_reset_fpstatus(tcg_ctx);                                                     \
-    get_fpr(tcg_ctx, t0, rA(ctx->opcode));                                             \
-    get_fpr(tcg_ctx, t1, rC(ctx->opcode));                                             \
-    gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1);                                    \
-    if (isfloat) {                                                            \
-        gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2);                                     \
-    }                                                                         \
-    set_fpr(tcg_ctx, rD(ctx->opcode), t2);                                             \
-    if (set_fprf) {                                                           \
-        gen_compute_fprf_float64(tcg_ctx, t2);                                         \
-    }                                                                         \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_cr1_from_fpscr(ctx);                                          \
-    }                                                                         \
-    tcg_temp_free_i64(tcg_ctx, t0);                                                    \
-    tcg_temp_free_i64(tcg_ctx, t1);                                                    \
-    tcg_temp_free_i64(tcg_ctx, t2);                                                    \
-}
-#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type)                        \
-_GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type);               \
-_GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type);
-
-#define GEN_FLOAT_B(name, op2, op3, set_fprf, type)                           \
-static void gen_f##name(DisasContext *ctx)                                    \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv_i64 t0;                                                              \
-    TCGv_i64 t1;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    t1 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    gen_reset_fpstatus(tcg_ctx);                                                     \
-    get_fpr(tcg_ctx, t0, rB(ctx->opcode));                                             \
-    gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0);                                      \
-    set_fpr(tcg_ctx, rD(ctx->opcode), t1);                                             \
-    if (set_fprf) {                                                           \
-        gen_compute_fprf_float64(tcg_ctx, t1);                                         \
-    }                                                                         \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_cr1_from_fpscr(ctx);                                          \
-    }                                                                         \
-    tcg_temp_free_i64(tcg_ctx, t0);                                                    \
-    tcg_temp_free_i64(tcg_ctx, t1);                                                    \
-}
+#define GEN_FLOAT_ACB(name, op2, set_fprf, type)                               \
+    _GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type);                  \
+    _GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type);
+
+#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type)      \
+    static void gen_f##name(DisasContext *ctx)                                 \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv_i64 t0;                                                           \
+        TCGv_i64 t1;                                                           \
+        TCGv_i64 t2;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        t1 = tcg_temp_new_i64(tcg_ctx);                                        \
+        t2 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_reset_fpstatus(tcg_ctx);                                           \
+        get_fpr(tcg_ctx, t0, rA(ctx->opcode));                                 \
+        get_fpr(tcg_ctx, t1, rB(ctx->opcode));                                 \
+        gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1);               \
+        if (isfloat) {                                                         \
+            gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2);                \
+        }                                                                      \
+        set_fpr(tcg_ctx, rD(ctx->opcode), t2);                                 \
+        if (set_fprf) {                                                        \
+            gen_compute_fprf_float64(tcg_ctx, t2);                             \
+        }                                                                      \
+        if (unlikely(Rc(ctx->opcode) != 0)) {                                  \
+            gen_set_cr1_from_fpscr(ctx);                                       \
+        }                                                                      \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+        tcg_temp_free_i64(tcg_ctx, t1);                                        \
+        tcg_temp_free_i64(tcg_ctx, t2);                                        \
+    }
+#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type)                         \
+    _GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type);            \
+    _GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type);
+
+#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type)      \
+    static void gen_f##name(DisasContext *ctx)                                 \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv_i64 t0;                                                           \
+        TCGv_i64 t1;                                                           \
+        TCGv_i64 t2;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        t1 = tcg_temp_new_i64(tcg_ctx);                                        \
+        t2 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_reset_fpstatus(tcg_ctx);                                           \
+        get_fpr(tcg_ctx, t0, rA(ctx->opcode));                                 \
+        get_fpr(tcg_ctx, t1, rC(ctx->opcode));                                 \
+        gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1);               \
+        if (isfloat) {                                                         \
+            gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2);                \
+        }                                                                      \
+        set_fpr(tcg_ctx, rD(ctx->opcode), t2);                                 \
+        if (set_fprf) {                                                        \
+            gen_compute_fprf_float64(tcg_ctx, t2);                             \
+        }                                                                      \
+        if (unlikely(Rc(ctx->opcode) != 0)) {                                  \
+            gen_set_cr1_from_fpscr(ctx);                                       \
+        }                                                                      \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+        tcg_temp_free_i64(tcg_ctx, t1);                                        \
+        tcg_temp_free_i64(tcg_ctx, t2);                                        \
+    }
+#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type)                         \
+    _GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type);            \
+    _GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type);
+
+#define GEN_FLOAT_B(name, op2, op3, set_fprf, type)                            \
+    static void gen_f##name(DisasContext *ctx)                                 \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv_i64 t0;                                                           \
+        TCGv_i64 t1;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        t1 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_reset_fpstatus(tcg_ctx);                                           \
+        get_fpr(tcg_ctx, t0, rB(ctx->opcode));                                 \
+        gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0);                 \
+        set_fpr(tcg_ctx, rD(ctx->opcode), t1);                                 \
+        if (set_fprf) {                                                        \
+            gen_compute_fprf_float64(tcg_ctx, t1);                             \
+        }                                                                      \
+        if (unlikely(Rc(ctx->opcode) != 0)) {                                  \
+            gen_set_cr1_from_fpscr(ctx);                                       \
+        }                                                                      \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+        tcg_temp_free_i64(tcg_ctx, t1);                                        \
+    }
 
-#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type)                          \
-static void gen_f##name(DisasContext *ctx)                                    \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv_i64 t0;                                                              \
-    TCGv_i64 t1;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    t1 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    gen_reset_fpstatus(tcg_ctx);                                                     \
-    get_fpr(tcg_ctx, t0, rB(ctx->opcode));                                             \
-    gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0);                                      \
-    set_fpr(tcg_ctx, rD(ctx->opcode), t1);                                             \
-    if (set_fprf) {                                                           \
-        gen_compute_fprf_float64(tcg_ctx, t1);                                         \
-    }                                                                         \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_cr1_from_fpscr(ctx);                                          \
-    }                                                                         \
-    tcg_temp_free_i64(tcg_ctx, t0);                                                    \
-    tcg_temp_free_i64(tcg_ctx, t1);                                                    \
-}
+#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type)                           \
+    static void gen_f##name(DisasContext *ctx)                                 \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv_i64 t0;                                                           \
+        TCGv_i64 t1;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        t1 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_reset_fpstatus(tcg_ctx);                                           \
+        get_fpr(tcg_ctx, t0, rB(ctx->opcode));                                 \
+        gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0);                 \
+        set_fpr(tcg_ctx, rD(ctx->opcode), t1);                                 \
+        if (set_fprf) {                                                        \
+            gen_compute_fprf_float64(tcg_ctx, t1);                             \
+        }                                                                      \
+        if (unlikely(Rc(ctx->opcode) != 0)) {                                  \
+            gen_set_cr1_from_fpscr(ctx);                                       \
+        }                                                                      \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+        tcg_temp_free_i64(tcg_ctx, t1);                                        \
+    }
 
 /* fadd - fadds */
 GEN_FLOAT_AB(add, 0x15, 0x000007C0, 1, PPC_FLOAT);
@@ -217,7 +217,7 @@ GEN_FLOAT_BS(rsqrte, 0x3F, 0x1A, 1, PPC_FLOAT_FRSQRTE);
 /* frsqrtes */
 static void gen_frsqrtes(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     TCGv_i64 t1;
     if (unlikely(!ctx->fpu_enabled)) {
@@ -248,7 +248,7 @@ GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT);
 /* fsqrt */
 static void gen_fsqrt(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     TCGv_i64 t1;
     if (unlikely(!ctx->fpu_enabled)) {
@@ -271,7 +271,7 @@ static void gen_fsqrt(DisasContext *ctx)
 
 static void gen_fsqrts(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     TCGv_i64 t1;
     if (unlikely(!ctx->fpu_enabled)) {
@@ -342,7 +342,7 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
 
 static void gen_ftdiv(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     TCGv_i64 t1;
     if (unlikely(!ctx->fpu_enabled)) {
@@ -360,7 +360,7 @@ static void gen_ftdiv(DisasContext *ctx)
 
 static void gen_ftsqrt(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     if (unlikely(!ctx->fpu_enabled)) {
         gen_exception(ctx, POWERPC_EXCP_FPU);
@@ -372,14 +372,12 @@ static void gen_ftsqrt(DisasContext *ctx)
     tcg_temp_free_i64(tcg_ctx, t0);
 }
 
-
-
 /***                         Floating-Point compare                        ***/
 
 /* fcmpo */
 static void gen_fcmpo(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i32 crf;
     TCGv_i64 t0;
     TCGv_i64 t1;
@@ -403,7 +401,7 @@ static void gen_fcmpo(DisasContext *ctx)
 /* fcmpu */
 static void gen_fcmpu(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i32 crf;
     TCGv_i64 t0;
     TCGv_i64 t1;
@@ -429,7 +427,7 @@ static void gen_fcmpu(DisasContext *ctx)
 /* XXX: beware that fabs never checks for NaNs nor update FPSCR */
 static void gen_fabs(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     TCGv_i64 t1;
     if (unlikely(!ctx->fpu_enabled)) {
@@ -452,7 +450,7 @@ static void gen_fabs(DisasContext *ctx)
 /* XXX: beware that fmr never checks for NaNs nor update FPSCR */
 static void gen_fmr(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     if (unlikely(!ctx->fpu_enabled)) {
         gen_exception(ctx, POWERPC_EXCP_FPU);
@@ -471,7 +469,7 @@ static void gen_fmr(DisasContext *ctx)
 /* XXX: beware that fnabs never checks for NaNs nor update FPSCR */
 static void gen_fnabs(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     TCGv_i64 t1;
     if (unlikely(!ctx->fpu_enabled)) {
@@ -494,7 +492,7 @@ static void gen_fnabs(DisasContext *ctx)
 /* XXX: beware that fneg never checks for NaNs nor update FPSCR */
 static void gen_fneg(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     TCGv_i64 t1;
     if (unlikely(!ctx->fpu_enabled)) {
@@ -517,7 +515,7 @@ static void gen_fneg(DisasContext *ctx)
 /* XXX: beware that fcpsgn never checks for NaNs nor update FPSCR */
 static void gen_fcpsgn(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     TCGv_i64 t1;
     TCGv_i64 t2;
@@ -542,7 +540,7 @@ static void gen_fcpsgn(DisasContext *ctx)
 
 static void gen_fmrgew(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 b0;
     TCGv_i64 t0;
     TCGv_i64 t1;
@@ -565,7 +563,7 @@ static void gen_fmrgew(DisasContext *ctx)
 
 static void gen_fmrgow(DisasContext *ctx)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv_i64 t0;
     TCGv_i64 t1;
     TCGv_i64 t2;
@@ -607,8 +605,8 @@ static void gen_mcrfs(DisasContext *ctx)
     shift = 4 * nibble;
     tcg_gen_shri_tl(tcg_ctx, tmp, cpu_fpscr, shift);
     tcg_gen_trunc_tl_i32(tcg_ctx, cpu_crf[crfD(ctx->opcode)], tmp);
-    tcg_gen_andi_i32(tcg_ctx, cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],
-                     0xf);
+    tcg_gen_andi_i32(tcg_ctx, cpu_crf[crfD(ctx->opcode)],
+                     cpu_crf[crfD(ctx->opcode)], 0xf);
     tcg_temp_free(tcg_ctx, tmp);
     tcg_gen_extu_tl_i64(tcg_ctx, tnew_fpscr, cpu_fpscr);
     /* Only the exception bits (including FX) should be cleared if read */
@@ -836,7 +834,8 @@ static void gen_mtfsf(DisasContext *ctx)
     }
     gen_reset_fpstatus(tcg_ctx);
     if (l) {
-        t0 = tcg_const_i32(tcg_ctx, (ctx->insns_flags2 & PPC2_ISA205) ? 0xffff : 0xff);
+        t0 = tcg_const_i32(tcg_ctx,
+                           (ctx->insns_flags2 & PPC2_ISA205) ? 0xffff : 0xff);
     } else {
         t0 = tcg_const_i32(tcg_ctx, flm << (w * 8));
     }
@@ -887,101 +886,101 @@ static void gen_mtfsfi(DisasContext *ctx)
 }
 
 /***                         Floating-point load                           ***/
-#define GEN_LDF(name, ldop, opc, type)                                        \
-static void glue(gen_, name)(DisasContext *ctx)                               \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new(tcg_ctx);                                                      \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    gen_qemu_##ldop(ctx, t0, EA);                                             \
-    set_fpr(tcg_ctx, rD(ctx->opcode), t0);                                             \
-    tcg_temp_free(tcg_ctx, EA);                                                        \
-    tcg_temp_free_i64(tcg_ctx, t0);                                                    \
-}
+#define GEN_LDF(name, ldop, opc, type)                                         \
+    static void glue(gen_, name)(DisasContext * ctx)                           \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv EA;                                                               \
+        TCGv_i64 t0;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        gen_set_access_type(ctx, ACCESS_FLOAT);                                \
+        EA = tcg_temp_new(tcg_ctx);                                            \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_addr_imm_index(ctx, EA, 0);                                        \
+        gen_qemu_##ldop(ctx, t0, EA);                                          \
+        set_fpr(tcg_ctx, rD(ctx->opcode), t0);                                 \
+        tcg_temp_free(tcg_ctx, EA);                                            \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+    }
 
-#define GEN_LDUF(name, ldop, opc, type)                                       \
-static void glue(gen_, name##u)(DisasContext *ctx)                            \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new(tcg_ctx);                                               \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                           \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    gen_qemu_##ldop(ctx, t0, EA);                                             \
-    set_fpr(tcg_ctx, rD(ctx->opcode), t0);                                    \
-    tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA);                    \
-    tcg_temp_free(tcg_ctx, EA);                                               \
-    tcg_temp_free_i64(tcg_ctx, t0);                                           \
-}
+#define GEN_LDUF(name, ldop, opc, type)                                        \
+    static void glue(gen_, name##u)(DisasContext * ctx)                        \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv EA;                                                               \
+        TCGv_i64 t0;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        if (unlikely(rA(ctx->opcode) == 0)) {                                  \
+            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                \
+            return;                                                            \
+        }                                                                      \
+        gen_set_access_type(ctx, ACCESS_FLOAT);                                \
+        EA = tcg_temp_new(tcg_ctx);                                            \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_addr_imm_index(ctx, EA, 0);                                        \
+        gen_qemu_##ldop(ctx, t0, EA);                                          \
+        set_fpr(tcg_ctx, rD(ctx->opcode), t0);                                 \
+        tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA);                 \
+        tcg_temp_free(tcg_ctx, EA);                                            \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+    }
 
-#define GEN_LDUXF(name, ldop, opc, type)                                      \
-static void glue(gen_, name##ux)(DisasContext *ctx)                           \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                           \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new(tcg_ctx);                                               \
-    gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##ldop(ctx, t0, EA);                                             \
-    set_fpr(tcg_ctx, rD(ctx->opcode), t0);                                    \
-    tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA);                    \
-    tcg_temp_free(tcg_ctx, EA);                                               \
-    tcg_temp_free_i64(tcg_ctx, t0);                                           \
-}
+#define GEN_LDUXF(name, ldop, opc, type)                                       \
+    static void glue(gen_, name##ux)(DisasContext * ctx)                       \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv EA;                                                               \
+        TCGv_i64 t0;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        if (unlikely(rA(ctx->opcode) == 0)) {                                  \
+            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                \
+            return;                                                            \
+        }                                                                      \
+        gen_set_access_type(ctx, ACCESS_FLOAT);                                \
+        EA = tcg_temp_new(tcg_ctx);                                            \
+        gen_addr_reg_index(ctx, EA);                                           \
+        gen_qemu_##ldop(ctx, t0, EA);                                          \
+        set_fpr(tcg_ctx, rD(ctx->opcode), t0);                                 \
+        tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA);                 \
+        tcg_temp_free(tcg_ctx, EA);                                            \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+    }
 
-#define GEN_LDXF(name, ldop, opc2, opc3, type)                                \
-static void glue(gen_, name##x)(DisasContext *ctx)                            \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new(tcg_ctx);                                               \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                           \
-    gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##ldop(ctx, t0, EA);                                             \
-    set_fpr(tcg_ctx, rD(ctx->opcode), t0);                                    \
-    tcg_temp_free(tcg_ctx, EA);                                               \
-    tcg_temp_free_i64(tcg_ctx, t0);                                           \
-}
+#define GEN_LDXF(name, ldop, opc2, opc3, type)                                 \
+    static void glue(gen_, name##x)(DisasContext * ctx)                        \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv EA;                                                               \
+        TCGv_i64 t0;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        gen_set_access_type(ctx, ACCESS_FLOAT);                                \
+        EA = tcg_temp_new(tcg_ctx);                                            \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_addr_reg_index(ctx, EA);                                           \
+        gen_qemu_##ldop(ctx, t0, EA);                                          \
+        set_fpr(tcg_ctx, rD(ctx->opcode), t0);                                 \
+        tcg_temp_free(tcg_ctx, EA);                                            \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+    }
 
-#define GEN_LDFS(name, ldop, op, type)                                        \
-GEN_LDF(name, ldop, op | 0x20, type);                                         \
-GEN_LDUF(name, ldop, op | 0x21, type);                                        \
-GEN_LDUXF(name, ldop, op | 0x01, type);                                       \
-GEN_LDXF(name, ldop, 0x17, op | 0x00, type)
+#define GEN_LDFS(name, ldop, op, type)                                         \
+    GEN_LDF(name, ldop, op | 0x20, type);                                      \
+    GEN_LDUF(name, ldop, op | 0x21, type);                                     \
+    GEN_LDUXF(name, ldop, op | 0x01, type);                                    \
+    GEN_LDXF(name, ldop, 0x17, op | 0x00, type)
 
 static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr)
 {
@@ -992,9 +991,9 @@ static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr)
     tcg_temp_free_i32(tcg_ctx, tmp);
 }
 
- /* lfd lfdu lfdux lfdx */
+/* lfd lfdu lfdux lfdx */
 GEN_LDFS(lfd, ld64_i64, 0x12, PPC_FLOAT);
- /* lfs lfsu lfsux lfsx */
+/* lfs lfsu lfsux lfsx */
 GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT);
 
 /* lfdepx (external PID lfdx) */
@@ -1132,101 +1131,101 @@ static void gen_lfiwzx(DisasContext *ctx)
     tcg_temp_free_i64(tcg_ctx, t0);
 }
 /***                         Floating-point store                          ***/
-#define GEN_STF(name, stop, opc, type)                                        \
-static void glue(gen_, name)(DisasContext *ctx)                               \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new(tcg_ctx);                                                      \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    get_fpr(tcg_ctx, t0, rS(ctx->opcode));                                             \
-    gen_qemu_##stop(ctx, t0, EA);                                             \
-    tcg_temp_free(tcg_ctx, EA);                                                        \
-    tcg_temp_free_i64(tcg_ctx, t0);                                                    \
-}
+#define GEN_STF(name, stop, opc, type)                                         \
+    static void glue(gen_, name)(DisasContext * ctx)                           \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv EA;                                                               \
+        TCGv_i64 t0;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        gen_set_access_type(ctx, ACCESS_FLOAT);                                \
+        EA = tcg_temp_new(tcg_ctx);                                            \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_addr_imm_index(ctx, EA, 0);                                        \
+        get_fpr(tcg_ctx, t0, rS(ctx->opcode));                                 \
+        gen_qemu_##stop(ctx, t0, EA);                                          \
+        tcg_temp_free(tcg_ctx, EA);                                            \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+    }
 
-#define GEN_STUF(name, stop, opc, type)                                       \
-static void glue(gen_, name##u)(DisasContext *ctx)                            \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new(tcg_ctx);                                                      \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    get_fpr(tcg_ctx, t0, rS(ctx->opcode));                                             \
-    gen_qemu_##stop(ctx, t0, EA);                                             \
-    tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(tcg_ctx, EA);                                                        \
-    tcg_temp_free_i64(tcg_ctx, t0);                                                    \
-}
+#define GEN_STUF(name, stop, opc, type)                                        \
+    static void glue(gen_, name##u)(DisasContext * ctx)                        \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv EA;                                                               \
+        TCGv_i64 t0;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        if (unlikely(rA(ctx->opcode) == 0)) {                                  \
+            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                \
+            return;                                                            \
+        }                                                                      \
+        gen_set_access_type(ctx, ACCESS_FLOAT);                                \
+        EA = tcg_temp_new(tcg_ctx);                                            \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_addr_imm_index(ctx, EA, 0);                                        \
+        get_fpr(tcg_ctx, t0, rS(ctx->opcode));                                 \
+        gen_qemu_##stop(ctx, t0, EA);                                          \
+        tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA);                 \
+        tcg_temp_free(tcg_ctx, EA);                                            \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+    }
 
-#define GEN_STUXF(name, stop, opc, type)                                      \
-static void glue(gen_, name##ux)(DisasContext *ctx)                           \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new(tcg_ctx);                                                      \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    gen_addr_reg_index(ctx, EA);                                              \
-    get_fpr(tcg_ctx, t0, rS(ctx->opcode));                                             \
-    gen_qemu_##stop(ctx, t0, EA);                                             \
-    tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(tcg_ctx, EA);                                                        \
-    tcg_temp_free_i64(tcg_ctx, t0);                                                    \
-}
+#define GEN_STUXF(name, stop, opc, type)                                       \
+    static void glue(gen_, name##ux)(DisasContext * ctx)                       \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv EA;                                                               \
+        TCGv_i64 t0;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        if (unlikely(rA(ctx->opcode) == 0)) {                                  \
+            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                \
+            return;                                                            \
+        }                                                                      \
+        gen_set_access_type(ctx, ACCESS_FLOAT);                                \
+        EA = tcg_temp_new(tcg_ctx);                                            \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_addr_reg_index(ctx, EA);                                           \
+        get_fpr(tcg_ctx, t0, rS(ctx->opcode));                                 \
+        gen_qemu_##stop(ctx, t0, EA);                                          \
+        tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA);                 \
+        tcg_temp_free(tcg_ctx, EA);                                            \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+    }
 
-#define GEN_STXF(name, stop, opc2, opc3, type)                                \
-static void glue(gen_, name##x)(DisasContext *ctx)                            \
-{                                                                             \
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                   \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new(tcg_ctx);                                                      \
-    t0 = tcg_temp_new_i64(tcg_ctx);                                                  \
-    gen_addr_reg_index(ctx, EA);                                              \
-    get_fpr(tcg_ctx, t0, rS(ctx->opcode));                                             \
-    gen_qemu_##stop(ctx, t0, EA);                                             \
-    tcg_temp_free(tcg_ctx, EA);                                                        \
-    tcg_temp_free_i64(tcg_ctx, t0);                                                    \
-}
+#define GEN_STXF(name, stop, opc2, opc3, type)                                 \
+    static void glue(gen_, name##x)(DisasContext * ctx)                        \
+    {                                                                          \
+        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                                \
+        TCGv EA;                                                               \
+        TCGv_i64 t0;                                                           \
+        if (unlikely(!ctx->fpu_enabled)) {                                     \
+            gen_exception(ctx, POWERPC_EXCP_FPU);                              \
+            return;                                                            \
+        }                                                                      \
+        gen_set_access_type(ctx, ACCESS_FLOAT);                                \
+        EA = tcg_temp_new(tcg_ctx);                                            \
+        t0 = tcg_temp_new_i64(tcg_ctx);                                        \
+        gen_addr_reg_index(ctx, EA);                                           \
+        get_fpr(tcg_ctx, t0, rS(ctx->opcode));                                 \
+        gen_qemu_##stop(ctx, t0, EA);                                          \
+        tcg_temp_free(tcg_ctx, EA);                                            \
+        tcg_temp_free_i64(tcg_ctx, t0);                                        \
+    }
 
-#define GEN_STFS(name, stop, op, type)                                        \
-GEN_STF(name, stop, op | 0x20, type);                                         \
-GEN_STUF(name, stop, op | 0x21, type);                                        \
-GEN_STUXF(name, stop, op | 0x01, type);                                       \
-GEN_STXF(name, stop, 0x17, op | 0x00, type)
+#define GEN_STFS(name, stop, op, type)                                         \
+    GEN_STF(name, stop, op | 0x20, type);                                      \
+    GEN_STUF(name, stop, op | 0x21, type);                                     \
+    GEN_STUXF(name, stop, op | 0x01, type);                                    \
+    GEN_STXF(name, stop, 0x17, op | 0x00, type)
 
 static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr)
 {
@@ -1338,8 +1337,7 @@ static inline void gen_qemu_st32fiw(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
 {
     TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
     TCGv t0 = tcg_temp_new(tcg_ctx);
-    tcg_gen_trunc_i64_tl(tcg_ctx, t0, arg1),
-    gen_qemu_st32(ctx, t0, arg2);
+    tcg_gen_trunc_i64_tl(tcg_ctx, t0, arg1), gen_qemu_st32(ctx, t0, arg2);
     tcg_temp_free(tcg_ctx, t0);
 }
 /* stfiwx */
diff --git a/qemu/target/ppc/translate/vmx-impl.inc.c b/qemu/target/ppc/translate/vmx-impl.inc.c
index 9d4211dd6e..16df4ae63b 100644
--- a/qemu/target/ppc/translate/vmx-impl.inc.c
+++ b/qemu/target/ppc/translate/vmx-impl.inc.c
@@ -922,13 +922,13 @@ GEN_VXFORM3(vsubeuqm, 31, 0);
 GEN_VXFORM3(vsubecuq, 31, 0);
 GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
             vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM(vrlb, 2, 0);
-GEN_VXFORM(vrlh, 2, 1);
-GEN_VXFORM(vrlw, 2, 2);
+GEN_VXFORM_V(vrlb, MO_8, tcg_gen_gvec_rotlv, 2, 0);
+GEN_VXFORM_V(vrlh, MO_16, tcg_gen_gvec_rotlv, 2, 1);
+GEN_VXFORM_V(vrlw, MO_32, tcg_gen_gvec_rotlv, 2, 2);
 GEN_VXFORM(vrlwmi, 2, 2);
 GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \
                 vrlwmi, PPC_NONE, PPC2_ISA300)
-GEN_VXFORM(vrld, 2, 3);
+GEN_VXFORM_V(vrld, MO_64, tcg_gen_gvec_rotlv, 2, 3);
 GEN_VXFORM(vrldmi, 2, 3);
 GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \
                 vrldmi, PPC_NONE, PPC2_ISA300)
@@ -1058,22 +1058,25 @@ GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \
 GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \
                  vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207)
 
-#define GEN_VXFORM_DUPI(name, tcg_op, opc2, opc3)                       \
-static void glue(gen_, name)(DisasContext *ctx)                         \
-    {                                                                   \
-        TCGContext *tcg_ctx = ctx->uc->tcg_ctx;                         \
-        int simm;                                                       \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        simm = SIMM5(ctx->opcode);                                      \
-        tcg_op(tcg_ctx, avr_full_offset(rD(ctx->opcode)), 16, 16, simm);\
+static void gen_vsplti(DisasContext *ctx, int vece)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    int simm;
+
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
     }
+    simm = SIMM5(ctx->opcode);
+    tcg_gen_gvec_dup_imm(tcg_ctx, vece, avr_full_offset(rD(ctx->opcode)), 16, 16, simm);
+}
+
+#define GEN_VXFORM_VSPLTI(name, vece, opc2, opc3) \
+static void glue(gen_, name)(DisasContext *ctx) { gen_vsplti(ctx, vece); }
 
-GEN_VXFORM_DUPI(vspltisb, tcg_gen_gvec_dup8i, 6, 12);
-GEN_VXFORM_DUPI(vspltish, tcg_gen_gvec_dup16i, 6, 13);
-GEN_VXFORM_DUPI(vspltisw, tcg_gen_gvec_dup32i, 6, 14);
+GEN_VXFORM_VSPLTI(vspltisb, MO_8, 6, 12);
+GEN_VXFORM_VSPLTI(vspltish, MO_16, 6, 13);
+GEN_VXFORM_VSPLTI(vspltisw, MO_32, 6, 14);
 
 #define GEN_VXFORM_NOA(name, opc2, opc3)                                \
 static void glue(gen_, name)(DisasContext *ctx)                         \
@@ -1598,7 +1601,7 @@ GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
 #undef GEN_VXRFORM_DUAL
 #undef GEN_VXRFORM1
 #undef GEN_VXRFORM
-#undef GEN_VXFORM_DUPI
+#undef GEN_VXFORM_VSPLTI
 #undef GEN_VXFORM_NOA
 #undef GEN_VXFORM_UIMM
 #undef GEN_VAFORM_PAIRED
diff --git a/qemu/target/ppc/translate/vsx-impl.inc.c b/qemu/target/ppc/translate/vsx-impl.inc.c
index 679da14902..a0c3832842 100644
--- a/qemu/target/ppc/translate/vsx-impl.inc.c
+++ b/qemu/target/ppc/translate/vsx-impl.inc.c
@@ -1629,7 +1629,7 @@ static void gen_xxspltib(DisasContext *ctx)
             return;
         }
     }
-    tcg_gen_gvec_dup8i(tcg_ctx, vsr_full_offset(rt), 16, 16, uim8);
+    tcg_gen_gvec_dup_imm(tcg_ctx, MO_8, vsr_full_offset(rt), 16, 16, uim8);
 }
 
 static void gen_xxsldwi(DisasContext *ctx)
diff --git a/qemu/target/ppc/translate_init.inc.c b/qemu/target/ppc/translate_init.inc.c
index d2fb1974ad..6ded89c817 100644
--- a/qemu/target/ppc/translate_init.inc.c
+++ b/qemu/target/ppc/translate_init.inc.c
@@ -3377,6 +3377,7 @@ static void init_excp_POWER9(CPUPPCState *env)
     init_excp_POWER8(env);
 
     env->excp_vectors[POWERPC_EXCP_HVIRT]    = 0x00000EA0;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL_VECTORED] = 0x00000000;
 }
 
 static void init_excp_POWER10(CPUPPCState *env)
@@ -5145,7 +5146,7 @@ POWERPC_FAMILY(e5500)(CPUClass *oc, void *data)
                        PPC_FLOAT_STFIWX | PPC_WAIT |
                        PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC |
                        PPC_64B | PPC_POPCNTB | PPC_POPCNTWD;
-    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | \
+    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 |
                         PPC2_FP_CVT_S64;
     pcc->msr_mask = (1ull << MSR_CM) |
                     (1ull << MSR_GS) |
@@ -5191,7 +5192,7 @@ POWERPC_FAMILY(e6500)(CPUClass *oc, void *data)
                        PPC_FLOAT_STFIWX | PPC_WAIT |
                        PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC |
                        PPC_64B | PPC_POPCNTB | PPC_POPCNTWD | PPC_ALTIVEC;
-    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | \
+    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 |
                         PPC2_FP_CVT_S64 | PPC2_ATOMIC_ISA206;
     pcc->msr_mask = (1ull << MSR_CM) |
                     (1ull << MSR_GS) |
@@ -8845,7 +8846,7 @@ POWERPC_FAMILY(POWER9)(CPUClass *oc, void *data)
     pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
                  POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
                  POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
-                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
+                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
     pcc->l1_dcache_size = 0x8000;
     pcc->l1_icache_size = 0x8000;
     pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
@@ -8898,11 +8899,6 @@ static void init_proc_POWER10(CPUPPCState *env)
     gen_spr_power8_rpr(env);
     gen_spr_power9_mmu(env);
 
-    /* POWER9 Specific registers */
-    spr_register_kvm(env, SPR_TIDR, "TIDR", NULL, NULL,
-                     spr_read_generic, spr_write_generic,
-                     KVM_REG_PPC_TIDR, 0);
-
     /* FIXME: Filter fields properly based on privilege level */
     spr_register_kvm_hv(env, SPR_PSSCR, "PSSCR", NULL, NULL, NULL, NULL,
                         spr_read_generic, spr_write_generic,
@@ -9680,7 +9676,7 @@ static int gdb_get_float_reg(CPUPPCState *env, GByteArray *buf, int n)
 {
     uint8_t *mem_buf;
     if (n < 32) {
-        gdb_get_reg64(buf, *cpu_fpr_ptr(env, n));
+        gdb_get_float64(buf, *cpu_fpr_ptr(env, n));
         mem_buf = gdb_get_reg_ptr(buf, 8);
         ppc_maybe_bswap_register(env, mem_buf, 8);
         return 8;
diff --git a/qemu/target/riscv/cpu.c b/qemu/target/riscv/cpu.c
index 2313cfc6cc..9979df4979 100644
--- a/qemu/target/riscv/cpu.c
+++ b/qemu/target/riscv/cpu.c
@@ -30,22 +30,20 @@
 
 // static const char riscv_exts[26] = "IEMAFDQCLBJTPVNSUHKORWXYZG";
 
-const char * const riscv_int_regnames[] = {
-  "x0/zero", "x1/ra",  "x2/sp",  "x3/gp",  "x4/tp",  "x5/t0",   "x6/t1",
-  "x7/t2",   "x8/s0",  "x9/s1",  "x10/a0", "x11/a1", "x12/a2",  "x13/a3",
-  "x14/a4",  "x15/a5", "x16/a6", "x17/a7", "x18/s2", "x19/s3",  "x20/s4",
-  "x21/s5",  "x22/s6", "x23/s7", "x24/s8", "x25/s9", "x26/s10", "x27/s11",
-  "x28/t3",  "x29/t4", "x30/t5", "x31/t6"
-};
-
-const char * const riscv_fpr_regnames[] = {
-  "f0/ft0",   "f1/ft1",  "f2/ft2",   "f3/ft3",   "f4/ft4",  "f5/ft5",
-  "f6/ft6",   "f7/ft7",  "f8/fs0",   "f9/fs1",   "f10/fa0", "f11/fa1",
-  "f12/fa2",  "f13/fa3", "f14/fa4",  "f15/fa5",  "f16/fa6", "f17/fa7",
-  "f18/fs2",  "f19/fs3", "f20/fs4",  "f21/fs5",  "f22/fs6", "f23/fs7",
-  "f24/fs8",  "f25/fs9", "f26/fs10", "f27/fs11", "f28/ft8", "f29/ft9",
-  "f30/ft10", "f31/ft11"
-};
+const char *const riscv_int_regnames[] = {
+    "x0/zero", "x1/ra",  "x2/sp",  "x3/gp",  "x4/tp",  "x5/t0",   "x6/t1",
+    "x7/t2",   "x8/s0",  "x9/s1",  "x10/a0", "x11/a1", "x12/a2",  "x13/a3",
+    "x14/a4",  "x15/a5", "x16/a6", "x17/a7", "x18/s2", "x19/s3",  "x20/s4",
+    "x21/s5",  "x22/s6", "x23/s7", "x24/s8", "x25/s9", "x26/s10", "x27/s11",
+    "x28/t3",  "x29/t4", "x30/t5", "x31/t6"};
+
+const char *const riscv_fpr_regnames[] = {
+    "f0/ft0",   "f1/ft1",  "f2/ft2",   "f3/ft3",   "f4/ft4",  "f5/ft5",
+    "f6/ft6",   "f7/ft7",  "f8/fs0",   "f9/fs1",   "f10/fa0", "f11/fa1",
+    "f12/fa2",  "f13/fa3", "f14/fa4",  "f15/fa5",  "f16/fa6", "f17/fa7",
+    "f18/fs2",  "f19/fs3", "f20/fs4",  "f21/fs5",  "f22/fs6", "f23/fs7",
+    "f24/fs8",  "f25/fs9", "f26/fs10", "f27/fs11", "f28/ft8", "f29/ft9",
+    "f30/ft10", "f31/ft11"};
 
 static void set_misa(CPURISCVState *env, target_ulong misa)
 {
@@ -57,6 +55,11 @@ static void set_priv_version(CPURISCVState *env, int priv_ver)
     env->priv_ver = priv_ver;
 }
 
+static void set_vext_version(CPURISCVState *env, int vext_ver)
+{
+    env->vext_ver = vext_ver;
+}
+
 static void set_feature(CPURISCVState *env, int feature)
 {
     env->features |= (1ULL << feature);
@@ -75,65 +78,48 @@ static void riscv_any_cpu_init(CPUState *obj)
     set_resetvec(env, DEFAULT_RSTVEC);
 }
 
-#if defined(TARGET_RISCV32)
-// rv32
-static void riscv_base32_cpu_init(CPUState *obj)
+static void riscv_base_cpu_init(CPUState *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
     /* We set this in the realise function */
     set_misa(env, 0);
+    set_resetvec(env, DEFAULT_RSTVEC);
 }
 
-// sifive-u34
-static void rv32gcsu_priv1_10_0_cpu_init(CPUState *obj)
+static void rvxx_sifive_u_cpu_init(CPUState *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RV32 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
+    set_misa(env, RVXLEN | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
-    set_resetvec(env, DEFAULT_RSTVEC);
-    set_feature(env, RISCV_FEATURE_MMU);
-    set_feature(env, RISCV_FEATURE_PMP);
+    set_resetvec(env, 0x1004);
 }
 
-// sifive-e31
-static void rv32imacu_nommu_cpu_init(CPUState *obj)
+static void rvxx_sifive_e_cpu_init(CPUState *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RV32 | RVI | RVM | RVA | RVC | RVU);
+    set_misa(env, RVXLEN | RVI | RVM | RVA | RVC | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
-    set_resetvec(env, DEFAULT_RSTVEC);
-    set_feature(env, RISCV_FEATURE_PMP);
+    set_resetvec(env, 0x1004);
 }
 
-#elif defined(TARGET_RISCV64)
-// rv64
-static void riscv_base64_cpu_init(CPUState *obj)
-{
-    CPURISCVState *env = &RISCV_CPU(obj)->env;
-    /* We set this in the realise function */
-    set_misa(env, 0);
-}
+#if defined(TARGET_RISCV32)
 
-// sifive-u54
-static void rv64gcsu_priv1_10_0_cpu_init(CPUState *obj)
+static void rv32_ibex_cpu_init(CPUState *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RV64 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
+    set_misa(env, RV32 | RVI | RVM | RVC | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
-    set_resetvec(env, DEFAULT_RSTVEC);
-    set_feature(env, RISCV_FEATURE_MMU);
-    set_feature(env, RISCV_FEATURE_PMP);
+    set_resetvec(env, 0x8090);
 }
 
-// sifive-e51
-static void rv64imacu_nommu_cpu_init(CPUState *obj)
+static void rv32_imafcu_nommu_cpu_init(CPUState *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RV64 | RVI | RVM | RVA | RVC | RVU);
+    set_misa(env, RV32 | RVI | RVM | RVA | RVF | RVC | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
     set_resetvec(env, DEFAULT_RSTVEC);
-    set_feature(env, RISCV_FEATURE_PMP);
 }
+
 #endif
 
 static void riscv_cpu_set_pc(CPUState *cs, vaddr value)
@@ -192,6 +178,7 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev)
     RISCVCPU *cpu = RISCV_CPU(dev);
     CPURISCVState *env = &cpu->env;
     int priv_version = PRIV_VERSION_1_11_0;
+    int vext_version = VEXT_VERSION_0_07_1;
     target_ulong target_misa = 0;
 
     cpu_exec_realizefn(cs);
@@ -201,16 +188,15 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev)
             priv_version = PRIV_VERSION_1_11_0;
         } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.10.0")) {
             priv_version = PRIV_VERSION_1_10_0;
-        } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.9.1")) {
-            priv_version = PRIV_VERSION_1_09_1;
         } else {
-            // error_setg(errp, "Unsupported privilege spec version '%s'", cpu->cfg.priv_spec);
+            // error_setg(errp, "Unsupported privilege spec version '%s'",
+            // cpu->cfg.priv_spec);
             return;
         }
     }
 
     set_priv_version(env, priv_version);
-    set_resetvec(env, DEFAULT_RSTVEC);
+    set_vext_version(env, vext_version);
 
     if (cpu->cfg.mmu) {
         set_feature(env, RISCV_FEATURE_MMU);
@@ -224,7 +210,7 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev)
     if (!env->misa) {
         /* Do some ISA extension error checking */
         if (cpu->cfg.ext_i && cpu->cfg.ext_e) {
-            //error_setg(errp, "I and E extensions are incompatible");
+            // error_setg(errp, "I and E extensions are incompatible");
             return;
         }
 
@@ -233,8 +219,9 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev)
             return;
         }
 
-        if (cpu->cfg.ext_g && !(cpu->cfg.ext_i & cpu->cfg.ext_m &
-                    cpu->cfg.ext_a & cpu->cfg.ext_f & cpu->cfg.ext_d)) {
+        if (cpu->cfg.ext_g &&
+            !(cpu->cfg.ext_i & cpu->cfg.ext_m & cpu->cfg.ext_a &
+              cpu->cfg.ext_f & cpu->cfg.ext_d)) {
             // warn_report("Setting G will also set IMAFD");
             cpu->cfg.ext_i = true;
             cpu->cfg.ext_m = true;
@@ -274,6 +261,45 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev)
         if (cpu->cfg.ext_h) {
             target_misa |= RVH;
         }
+        if (cpu->cfg.ext_v) {
+            target_misa |= RVV;
+            if (!is_power_of_2(cpu->cfg.vlen)) {
+                // error_setg(errp,
+                //         "Vector extension VLEN must be power of 2");
+                return;
+            }
+            if (cpu->cfg.vlen > RV_VLEN_MAX || cpu->cfg.vlen < 128) {
+                // error_setg(errp,
+                //         "Vector extension implementation only supports VLEN "
+                //         "in the range [128, %d]", RV_VLEN_MAX);
+                return;
+            }
+            if (!is_power_of_2(cpu->cfg.elen)) {
+                // error_setg(errp,
+                //         "Vector extension ELEN must be power of 2");
+                return;
+            }
+            if (cpu->cfg.elen > 64 || cpu->cfg.vlen < 8) {
+                // error_setg(errp,
+                //         "Vector extension implementation only supports ELEN "
+                //         "in the range [8, 64]");
+                return;
+            }
+            if (cpu->cfg.vext_spec) {
+                if (!g_strcmp0(cpu->cfg.vext_spec, "v0.7.1")) {
+                    vext_version = VEXT_VERSION_0_07_1;
+                } else {
+                    // error_setg(errp,
+                    //        "Unsupported vector spec version '%s'",
+                    //        cpu->cfg.vext_spec);
+                    return;
+                }
+            } else {
+                // qemu_log("vector verison is not specified, "
+                //         "use the default value v0.7.1\n");
+            }
+            set_vext_version(env, vext_version);
+        }
 
         set_misa(env, RVXLEN | target_misa);
     }
@@ -316,16 +342,17 @@ typedef struct CPUModelInfo {
 } CPUModelInfo;
 
 static const CPUModelInfo cpu_models[] = {
-    {TYPE_RISCV_CPU_ANY,  riscv_any_cpu_init},
-#ifdef TARGET_RISCV32
-    {TYPE_RISCV_CPU_BASE32, riscv_base32_cpu_init},
-    {TYPE_RISCV_CPU_SIFIVE_E31, rv32imacu_nommu_cpu_init},
-    {TYPE_RISCV_CPU_SIFIVE_U34, rv32gcsu_priv1_10_0_cpu_init},
-#endif
-#ifdef TARGET_RISCV64
-    {TYPE_RISCV_CPU_BASE64, riscv_base64_cpu_init},
-    {TYPE_RISCV_CPU_SIFIVE_E51, rv64imacu_nommu_cpu_init},
-    {TYPE_RISCV_CPU_SIFIVE_U54, rv64gcsu_priv1_10_0_cpu_init},
+    {TYPE_RISCV_CPU_ANY, riscv_any_cpu_init},
+#if defined(TARGET_RISCV32)
+    {TYPE_RISCV_CPU_BASE32, riscv_base_cpu_init},
+    {TYPE_RISCV_CPU_IBEX, rv32_ibex_cpu_init},
+    {TYPE_RISCV_CPU_SIFIVE_E31, rvxx_sifive_e_cpu_init},
+    {TYPE_RISCV_CPU_SIFIVE_E34, rv32_imafcu_nommu_cpu_init},
+    {TYPE_RISCV_CPU_SIFIVE_U34, rvxx_sifive_u_cpu_init},
+#elif defined(TARGET_RISCV64)
+    {TYPE_RISCV_CPU_BASE64, riscv_base_cpu_init},
+    {TYPE_RISCV_CPU_SIFIVE_E51, rvxx_sifive_e_cpu_init},
+    {TYPE_RISCV_CPU_SIFIVE_U54, rvxx_sifive_u_cpu_init},
 #endif
 };
 
@@ -339,7 +366,7 @@ RISCVCPU *cpu_riscv_init(struct uc_struct *uc)
     if (cpu == NULL) {
         return NULL;
     }
-    memset((void*)cpu, 0, sizeof(*cpu));
+    memset((void *)cpu, 0, sizeof(*cpu));
 
 #ifdef TARGET_RISCV32
     if (uc->cpu_model == INT_MAX) {
diff --git a/qemu/target/riscv/cpu.h b/qemu/target/riscv/cpu.h
index b94516eb7c..50c5d5fbf7 100644
--- a/qemu/target/riscv/cpu.h
+++ b/qemu/target/riscv/cpu.h
@@ -21,6 +21,7 @@
 #define RISCV_CPU_H
 
 #include "hw/core/cpu.h"
+#include "hw/registerfields.h"
 #include "exec/cpu-defs.h"
 #include "fpu/softfloat-types.h"
 
@@ -35,7 +36,9 @@ typedef struct TCGContext TCGContext;
 #define TYPE_RISCV_CPU_ANY              RISCV_CPU_TYPE_NAME("any")
 #define TYPE_RISCV_CPU_BASE32           RISCV_CPU_TYPE_NAME("rv32")
 #define TYPE_RISCV_CPU_BASE64           RISCV_CPU_TYPE_NAME("rv64")
+#define TYPE_RISCV_CPU_IBEX             RISCV_CPU_TYPE_NAME("lowrisc-ibex")
 #define TYPE_RISCV_CPU_SIFIVE_E31       RISCV_CPU_TYPE_NAME("sifive-e31")
+#define TYPE_RISCV_CPU_SIFIVE_E34       RISCV_CPU_TYPE_NAME("sifive-e34")
 #define TYPE_RISCV_CPU_SIFIVE_E51       RISCV_CPU_TYPE_NAME("sifive-e51")
 #define TYPE_RISCV_CPU_SIFIVE_U34       RISCV_CPU_TYPE_NAME("sifive-u34")
 #define TYPE_RISCV_CPU_SIFIVE_U54       RISCV_CPU_TYPE_NAME("sifive-u54")
@@ -57,6 +60,7 @@ typedef struct TCGContext TCGContext;
 #define RVA RV('A')
 #define RVF RV('F')
 #define RVD RV('D')
+#define RVV RV('V')
 #define RVC RV('C')
 #define RVS RV('S')
 #define RVU RV('U')
@@ -72,10 +76,11 @@ enum {
     RISCV_FEATURE_MISA
 };
 
-#define PRIV_VERSION_1_09_1 0x00010901
 #define PRIV_VERSION_1_10_0 0x00011000
 #define PRIV_VERSION_1_11_0 0x00011100
 
+#define VEXT_VERSION_0_07_1 0x00000701
+
 #define TRANSLATE_PMP_FAIL 2
 #define TRANSLATE_FAIL 1
 #define TRANSLATE_SUCCESS 0
@@ -87,9 +92,26 @@ typedef struct CPURISCVState CPURISCVState;
 
 #include "pmp.h"
 
+#define RV_VLEN_MAX 256
+
+FIELD(VTYPE, VLMUL, 0, 2)
+FIELD(VTYPE, VSEW, 2, 3)
+FIELD(VTYPE, VEDIV, 5, 2)
+FIELD(VTYPE, RESERVED, 7, sizeof(target_ulong) * 8 - 9)
+FIELD(VTYPE, VILL, sizeof(target_ulong) * 8 - 1, 1)
+
 struct CPURISCVState {
     target_ulong gpr[32];
     uint64_t fpr[32]; /* assume both F and D extensions */
+
+    /* vector coprocessor state. */
+    uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16);
+    target_ulong vxrm;
+    target_ulong vxsat;
+    target_ulong vl;
+    target_ulong vstart;
+    target_ulong vtype;
+
     target_ulong pc;
     target_ulong load_res;
     target_ulong load_val;
@@ -100,6 +122,7 @@ struct CPURISCVState {
     target_ulong guest_phys_fault_addr;
 
     target_ulong priv_ver;
+    target_ulong vext_ver;
     target_ulong misa;
     target_ulong misa_mask;
 
@@ -245,12 +268,16 @@ typedef struct RISCVCPU {
         bool ext_s;
         bool ext_u;
         bool ext_h;
+        bool ext_v;
         bool ext_counters;
         bool ext_ifencei;
         bool ext_icsr;
 
         char *priv_spec;
         char *user_spec;
+        char *vext_spec;
+        uint16_t vlen;
+        uint16_t elen;
         bool mmu;
         bool pmp;
     } cfg;
@@ -323,15 +350,56 @@ void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
 #define TB_FLAGS_MMU_MASK   3
 #define TB_FLAGS_MSTATUS_FS MSTATUS_FS
 
+typedef CPURISCVState CPUArchState;
+typedef RISCVCPU ArchCPU;
+#include "exec/cpu-all.h"
+
+FIELD(TB_FLAGS, VL_EQ_VLMAX, 2, 1)
+FIELD(TB_FLAGS, LMUL, 3, 2)
+FIELD(TB_FLAGS, SEW, 5, 3)
+FIELD(TB_FLAGS, VILL, 8, 1)
+
+/*
+ * A simplification for VLMAX
+ * = (1 << LMUL) * VLEN / (8 * (1 << SEW))
+ * = (VLEN << LMUL) / (8 << SEW)
+ * = (VLEN << LMUL) >> (SEW + 3)
+ * = VLEN >> (SEW + 3 - LMUL)
+ */
+static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype)
+{
+    uint8_t sew, lmul;
+
+    sew = FIELD_EX64(vtype, VTYPE, VSEW);
+    lmul = FIELD_EX64(vtype, VTYPE, VLMUL);
+    return cpu->cfg.vlen >> (sew + 3 - lmul);
+}
+
 static inline void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
-                                        target_ulong *cs_base, uint32_t *flags)
+                                        target_ulong *cs_base, uint32_t *pflags)
 {
+    uint32_t flags = 0;
+
     *pc = env->pc;
     *cs_base = 0;
-    *flags = cpu_mmu_index(env, 0);
+
+    if (riscv_has_ext(env, RVV)) {
+        uint32_t vlmax = vext_get_vlmax(env_archcpu(env), env->vtype);
+        bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl);
+        FIELD_DP32(flags, TB_FLAGS, VILL, FIELD_EX64(env->vtype, VTYPE, VILL), flags);
+        FIELD_DP32(flags, TB_FLAGS, SEW, FIELD_EX64(env->vtype, VTYPE, VSEW), flags);
+        FIELD_DP32(flags, TB_FLAGS, LMUL, FIELD_EX64(env->vtype, VTYPE, VLMUL), flags);
+        FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax, flags);
+    } else {
+        FIELD_DP32(flags, TB_FLAGS, VILL, 1, flags);
+    }
+
+    flags |= cpu_mmu_index(env, 0);
     if (riscv_cpu_fp_enabled(env)) {
-        *flags |= env->mstatus & MSTATUS_FS;
+        flags |= env->mstatus & MSTATUS_FS;
     }
+
+    *pflags = flags;
 }
 
 int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
@@ -372,9 +440,4 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops);
 
 void riscv_cpu_register_gdb_regs_for_features(CPUState *cs);
 
-typedef CPURISCVState CPUArchState;
-typedef RISCVCPU ArchCPU;
-
-#include "exec/cpu-all.h"
-
 #endif /* RISCV_CPU_H */
diff --git a/qemu/target/riscv/cpu_bits.h b/qemu/target/riscv/cpu_bits.h
index ffa73864a9..48625ac2fd 100644
--- a/qemu/target/riscv/cpu_bits.h
+++ b/qemu/target/riscv/cpu_bits.h
@@ -29,6 +29,14 @@
 #define FSR_NXA             (FPEXC_NX << FSR_AEXC_SHIFT)
 #define FSR_AEXC            (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
 
+/* Vector Fixed-Point round model */
+#define FSR_VXRM_SHIFT      9
+#define FSR_VXRM            (0x3 << FSR_VXRM_SHIFT)
+
+/* Vector Fixed-Point saturation flag */
+#define FSR_VXSAT_SHIFT     8
+#define FSR_VXSAT           (0x1 << FSR_VXSAT_SHIFT)
+
 /* Control and Status Registers */
 
 /* User Trap Setup */
@@ -48,6 +56,13 @@
 #define CSR_FRM             0x002
 #define CSR_FCSR            0x003
 
+/* User Vector CSRs */
+#define CSR_VSTART          0x008
+#define CSR_VXSAT           0x009
+#define CSR_VXRM            0x00a
+#define CSR_VL              0xc20
+#define CSR_VTYPE           0xc21
+
 /* User Timers and Counters */
 #define CSR_CYCLE           0xc00
 #define CSR_TIME            0xc01
diff --git a/qemu/target/riscv/cpu_helper.c b/qemu/target/riscv/cpu_helper.c
index bb2c3d869f..bad05e3049 100644
--- a/qemu/target/riscv/cpu_helper.c
+++ b/qemu/target/riscv/cpu_helper.c
@@ -300,9 +300,6 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
     MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
     int mode = mmu_idx;
     bool use_background = false;
-    hwaddr base;
-    int levels = 0, ptidxbits = 0, ptesize = 0, vm, sum, mxr, widened;
-
 
     /*
      * Check if we should use the background registers for the two
@@ -344,63 +341,45 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
 
     *prot = 0;
 
+    hwaddr base;
+    int levels, ptidxbits, ptesize, vm, sum, mxr, widened;
+
     if (first_stage == true) {
         mxr = get_field(env->mstatus, MSTATUS_MXR);
     } else {
         mxr = get_field(env->vsstatus, MSTATUS_MXR);
     }
 
-    if (env->priv_ver >= PRIV_VERSION_1_10_0) {
-        if (first_stage == true) {
-            if (use_background) {
-                base = (hwaddr)get_field(env->vsatp, SATP_PPN) << PGSHIFT;
-                vm = get_field(env->vsatp, SATP_MODE);
-            } else {
-                base = (hwaddr)get_field(env->satp, SATP_PPN) << PGSHIFT;
-                vm = get_field(env->satp, SATP_MODE);
-            }
-            widened = 0;
+    if (first_stage == true) {
+        if (use_background) {
+            base = (hwaddr)get_field(env->vsatp, SATP_PPN) << PGSHIFT;
+            vm = get_field(env->vsatp, SATP_MODE);
         } else {
-            base = (hwaddr)get_field(env->hgatp, HGATP_PPN) << PGSHIFT;
-            vm = get_field(env->hgatp, HGATP_MODE);
-            widened = 2;
-        }
-        sum = get_field(env->mstatus, MSTATUS_SUM);
-        switch (vm) {
-        case VM_1_10_SV32:
-          levels = 2; ptidxbits = 10; ptesize = 4; break;
-        case VM_1_10_SV39:
-          levels = 3; ptidxbits = 9; ptesize = 8; break;
-        case VM_1_10_SV48:
-          levels = 4; ptidxbits = 9; ptesize = 8; break;
-        case VM_1_10_SV57:
-          levels = 5; ptidxbits = 9; ptesize = 8; break;
-        case VM_1_10_MBARE:
-            *physical = addr;
-            *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-            return TRANSLATE_SUCCESS;
-        default:
-          g_assert_not_reached();
+            base = (hwaddr)get_field(env->satp, SATP_PPN) << PGSHIFT;
+            vm = get_field(env->satp, SATP_MODE);
         }
-    } else {
         widened = 0;
-        base = (hwaddr)(env->sptbr) << PGSHIFT;
-        sum = !get_field(env->mstatus, MSTATUS_PUM);
-        vm = get_field(env->mstatus, MSTATUS_VM);
-        switch (vm) {
-        case VM_1_09_SV32:
-          levels = 2; ptidxbits = 10; ptesize = 4; break;
-        case VM_1_09_SV39:
-          levels = 3; ptidxbits = 9; ptesize = 8; break;
-        case VM_1_09_SV48:
-          levels = 4; ptidxbits = 9; ptesize = 8; break;
-        case VM_1_09_MBARE:
-            *physical = addr;
-            *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-            return TRANSLATE_SUCCESS;
-        default:
-          g_assert_not_reached();
-        }
+    } else {
+        base = (hwaddr)get_field(env->hgatp, HGATP_PPN) << PGSHIFT;
+        vm = get_field(env->hgatp, HGATP_MODE);
+        widened = 2;
+    }
+    sum = get_field(env->mstatus, MSTATUS_SUM);
+    switch (vm) {
+    case VM_1_10_SV32:
+      levels = 2; ptidxbits = 10; ptesize = 4; break;
+    case VM_1_10_SV39:
+      levels = 3; ptidxbits = 9; ptesize = 8; break;
+    case VM_1_10_SV48:
+      levels = 4; ptidxbits = 9; ptesize = 8; break;
+    case VM_1_10_SV57:
+      levels = 5; ptidxbits = 9; ptesize = 8; break;
+    case VM_1_10_MBARE:
+        *physical = addr;
+        *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        return TRANSLATE_SUCCESS;
+    default:
+      g_assert_not_reached();
     }
 
     CPUState *cs = env_cpu(env);
@@ -438,11 +417,17 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
         hwaddr pte_addr;
 
         if (two_stage && first_stage) {
+            int vbase_prot;
             hwaddr vbase;
 
             /* Do the second stage translation on the base PTE address. */
-            get_physical_address(env, &vbase, prot, base, access_type,
-                                 mmu_idx, false, true);
+            int vbase_ret = get_physical_address(env, &vbase, &vbase_prot,
+                                                 base, MMU_DATA_LOAD,
+                                                 mmu_idx, false, true);
+
+            if (vbase_ret != TRANSLATE_SUCCESS) {
+                return vbase_ret;
+            }
 
             pte_addr = vbase + idx * ptesize;
         } else {
@@ -456,17 +441,9 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
         }
 
 #if defined(TARGET_RISCV32)
-#ifdef UNICORN_ARCH_POSTFIX
         target_ulong pte = glue(address_space_ldl, UNICORN_ARCH_POSTFIX)(cs->as->uc, cs->as, pte_addr, attrs, &res);
-#else
-        target_ulong pte = address_space_ldl(cs->as->uc, cs->as, pte_addr, attrs, &res);
-#endif
 #elif defined(TARGET_RISCV64)
-#ifdef UNICORN_ARCH_POSTFIX
         target_ulong pte = glue(address_space_ldq, UNICORN_ARCH_POSTFIX)(cs->as->uc, cs->as, pte_addr, attrs, &res);
-#else
-        target_ulong pte = address_space_ldq(cs->as->uc, cs->as, pte_addr, attrs, &res);
-#endif
 #endif
         if (res != MEMTX_OK) {
             return TRANSLATE_FAIL;
@@ -528,18 +505,14 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
                     &addr1, &l, false, MEMTXATTRS_UNSPECIFIED);
                 if (memory_region_is_ram(mr)) {
                     target_ulong *pte_pa =
-                        qemu_map_ram_ptr(mr->uc, mr->ram_block, addr1);
+                        qemu_map_ram_ptr(cs->as->uc, mr->ram_block, addr1);
 #if TCG_OVERSIZED_GUEST
                     /* MTTCG is not enabled on oversized TCG guests so
                      * page table updates do not need to be atomic */
                     *pte_pa = pte = updated_pte;
 #else
                     target_ulong old_pte =
-#ifdef _MSC_VER
-                        atomic_cmpxchg((long *)pte_pa, pte, updated_pte);
-#else
                         atomic_cmpxchg(pte_pa, pte, updated_pte);
-#endif
                     if (old_pte != pte) {
                         goto restart;
                     } else {
@@ -556,12 +529,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
             /* for superpage mappings, make a fake leaf PTE for the TLB's
                benefit. */
             target_ulong vpn = addr >> PGSHIFT;
-            if (i == 0) {
-                *physical = (ppn | (vpn & ((1L << (ptshift + widened)) - 1))) <<
-                             PGSHIFT;
-            } else {
-                *physical = (ppn | (vpn & ((1L << ptshift) - 1))) << PGSHIFT;
-            }
+            *physical = (ppn | (vpn & ((1L << ptshift) - 1))) << PGSHIFT;
 
             /* set permissions on the TLB entry */
             if ((pte & PTE_R) || ((pte & PTE_X) && mxr)) {
@@ -590,7 +558,6 @@ static void raise_mmu_exception(CPURISCVState *env, target_ulong address,
     int page_fault_exceptions;
     if (first_stage) {
         page_fault_exceptions =
-            (env->priv_ver >= PRIV_VERSION_1_10_0) &&
             get_field(env->satp, SATP_MODE) != VM_1_10_MBARE &&
             !pmp_violation;
     } else {
@@ -702,7 +669,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     CPURISCVState *env = &cpu->env;
     vaddr im_address;
     hwaddr pa = 0;
-    int prot;
+    int prot, prot2;
     bool pmp_violation = false;
     bool m_mode_two_stage = false;
     bool hs_mode_two_stage = false;
@@ -752,13 +719,13 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
             /* Second stage lookup */
             im_address = pa;
 
-            ret = get_physical_address(env, &pa, &prot, im_address,
+            ret = get_physical_address(env, &pa, &prot2, im_address,
                                        access_type, mmu_idx, false, true);
 
             qemu_log_mask(CPU_LOG_MMU,
                     "%s 2nd-stage address=%" VADDR_PRIx " ret %d physical "
                     TARGET_FMT_plx " prot %d\n",
-                    __func__, im_address, ret, pa, prot);
+                    __func__, im_address, ret, pa, prot2);
 
             if (riscv_feature(env, RISCV_FEATURE_PMP) &&
                 (ret == TRANSLATE_SUCCESS) &&
@@ -916,8 +883,7 @@ void riscv_cpu_do_interrupt(CPUState *cs)
         }
 
         s = env->mstatus;
-        s = set_field(s, MSTATUS_SPIE, env->priv_ver >= PRIV_VERSION_1_10_0 ?
-            get_field(s, MSTATUS_SIE) : get_field(s, MSTATUS_UIE << env->priv));
+        s = set_field(s, MSTATUS_SPIE, get_field(s, MSTATUS_SIE));
         s = set_field(s, MSTATUS_SPP, env->priv);
         s = set_field(s, MSTATUS_SIE, 0);
         env->mstatus = s;
@@ -954,8 +920,7 @@ void riscv_cpu_do_interrupt(CPUState *cs)
         }
 
         s = env->mstatus;
-        s = set_field(s, MSTATUS_MPIE, env->priv_ver >= PRIV_VERSION_1_10_0 ?
-            get_field(s, MSTATUS_MIE) : get_field(s, MSTATUS_UIE << env->priv));
+        s = set_field(s, MSTATUS_MPIE, get_field(s, MSTATUS_MIE));
         s = set_field(s, MSTATUS_MPP, env->priv);
         s = set_field(s, MSTATUS_MIE, 0);
         env->mstatus = s;
diff --git a/qemu/target/riscv/csr.c b/qemu/target/riscv/csr.c
index 785ef26dc4..bd746455d1 100644
--- a/qemu/target/riscv/csr.c
+++ b/qemu/target/riscv/csr.c
@@ -23,12 +23,21 @@
 #include "exec/exec-all.h"
 
 static int fs(CPURISCVState *env, int csrno);
+static int vs(CPURISCVState *env, int csrno);
 static int read_fflags(CPURISCVState *env, int csrno, target_ulong *val);
 static int write_fflags(CPURISCVState *env, int csrno, target_ulong val);
 static int read_frm(CPURISCVState *env, int csrno, target_ulong *val);
 static int write_frm(CPURISCVState *env, int csrno, target_ulong val);
 static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val);
 static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val);
+static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val);
+static int read_vl(CPURISCVState *env, int csrno, target_ulong *val);
+static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val);
+static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val);
+static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val);
+static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val);
+static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val);
+static int write_vstart(CPURISCVState *env, int csrno, target_ulong val);
 static int ctr(CPURISCVState *env, int csrno);
 static int read_instret(CPURISCVState *env, int csrno, target_ulong *val);
 static int read_time(CPURISCVState *env, int csrno, target_ulong *val);
@@ -49,8 +58,6 @@ static int read_mtvec(CPURISCVState *env, int csrno, target_ulong *val);
 static int write_mtvec(CPURISCVState *env, int csrno, target_ulong val);
 static int read_mcounteren(CPURISCVState *env, int csrno, target_ulong *val);
 static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val);
-static int read_mucounteren(CPURISCVState *env, int csrno, target_ulong *val);
-static int write_mucounteren(CPURISCVState *env, int csrno, target_ulong val);
 static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val);
 static int write_mscounteren(CPURISCVState *env, int csrno, target_ulong val);
 static int read_mscratch(CPURISCVState *env, int csrno, target_ulong *val);
@@ -154,6 +161,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
     [CSR_FRM] =                 { fs,   read_frm,         write_frm         },
     [CSR_FCSR] =                { fs,   read_fcsr,        write_fcsr        },
 
+    /* Vector CSRs */
+    [CSR_VSTART] =              { vs,   read_vstart,      write_vstart      },
+    [CSR_VXSAT] =               { vs,   read_vxsat,       write_vxsat       },
+    [CSR_VXRM] =                { vs,   read_vxrm,        write_vxrm        },
+    [CSR_VL] =                  { vs,   read_vl                             },
+    [CSR_VTYPE] =               { vs,   read_vtype                          },
     /* User Timers and Counters */
     [CSR_CYCLE] =               { ctr,  read_instret                        },
     [CSR_INSTRET] =             { ctr,  read_instret                        },
@@ -196,8 +209,6 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
     [CSR_MSTATUSH] =            { any,  read_mstatush,    write_mstatush    },
 #endif
 
-    /* Legacy Counter Setup (priv v1.9.1) */
-    [CSR_MUCOUNTEREN] =         { any,  read_mucounteren, write_mucounteren },
     [CSR_MSCOUNTEREN] =         { any,  read_mscounteren, write_mscounteren },
 
     /* Machine Trap Handling */
@@ -441,41 +452,34 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops)
 /* Predicates */
 static int fs(CPURISCVState *env, int csrno)
 {
+    /* loose check condition for fcsr in vector extension */
+    if ((csrno == CSR_FCSR) && (env->misa & RVV)) {
+        return 0;
+    }
     if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
         return -1;
     }
     return 0;
 }
 
+static int vs(CPURISCVState *env, int csrno)
+{
+    if (env->misa & RVV) {
+        return 0;
+    }
+    return -1;
+}
+
 static int ctr(CPURISCVState *env, int csrno)
 {
     CPUState *cs = env_cpu(env);
     RISCVCPU *cpu = RISCV_CPU(cs);
-    uint32_t ctr_en = ~0u;
 
     if (!cpu->cfg.ext_counters) {
         /* The Counters extensions is not enabled */
         return -1;
     }
 
-    /*
-     * The counters are always enabled at run time on newer priv specs, as the
-     * CSR has changed from controlling that the counters can be read to
-     * controlling that the counters increment.
-     */
-    if (env->priv_ver > PRIV_VERSION_1_09_1) {
-        return 0;
-    }
-
-    if (env->priv < PRV_M) {
-        ctr_en &= env->mcounteren;
-    }
-    if (env->priv < PRV_S) {
-        ctr_en &= env->scounteren;
-    }
-    if (!(ctr_en & (1u << (csrno & 31)))) {
-        return -1;
-    }
     return 0;
 }
 
@@ -554,6 +558,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val)
     }
     *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
         | (env->frm << FSR_RD_SHIFT);
+    if (vs(env, csrno) >= 0) {
+        *val |= (env->vxrm << FSR_VXRM_SHIFT)
+                | (env->vxsat << FSR_VXSAT_SHIFT);
+    }
     return 0;
 }
 
@@ -564,10 +572,62 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val)
     }
     env->mstatus |= MSTATUS_FS;
     env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
+    if (vs(env, csrno) >= 0) {
+        env->vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
+        env->vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
+    }
     riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
     return 0;
 }
 
+static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
+{
+    *val = env->vtype;
+    return 0;
+}
+
+static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
+{
+    *val = env->vl;
+    return 0;
+}
+
+static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
+{
+    *val = env->vxrm;
+    return 0;
+}
+
+static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
+{
+    env->vxrm = val;
+    return 0;
+}
+
+static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
+{
+    *val = env->vxsat;
+    return 0;
+}
+
+static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
+{
+    env->vxsat = val;
+    return 0;
+}
+
+static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val)
+{
+    *val = env->vstart;
+    return 0;
+}
+
+static int write_vstart(CPURISCVState *env, int csrno, target_ulong val)
+{
+    env->vstart = val;
+    return 0;
+}
+
 /* User Timers and Counters */
 static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
 {
@@ -640,9 +700,6 @@ static const target_ulong delegable_excps =
     (1ULL << (RISCV_EXCP_INST_GUEST_PAGE_FAULT)) |
     (1ULL << (RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT)) |
     (1ULL << (RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT));
-static const target_ulong sstatus_v1_9_mask = SSTATUS_SIE | SSTATUS_SPIE |
-    SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS |
-    SSTATUS_SUM | SSTATUS_SD;
 static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE |
     SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS |
     SSTATUS_SUM | SSTATUS_MXR | SSTATUS_SD;
@@ -651,20 +708,11 @@ static const target_ulong hip_writable_mask = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP;
 static const target_ulong vsip_writable_mask = MIP_VSSIP;
 
 #if defined(TARGET_RISCV32)
-static const char valid_vm_1_09[16] = {
-    [VM_1_09_MBARE] = 1,
-    [VM_1_09_SV32] = 1,
-};
 static const char valid_vm_1_10[16] = {
     [VM_1_10_MBARE] = 1,
     [VM_1_10_SV32] = 1
 };
 #elif defined(TARGET_RISCV64)
-static const char valid_vm_1_09[16] = {
-    [VM_1_09_MBARE] = 1,
-    [VM_1_09_SV39] = 1,
-    [VM_1_09_SV48] = 1,
-};
 static const char valid_vm_1_10[16] = {
     [VM_1_10_MBARE] = 1,
     [VM_1_10_SV39] = 1,
@@ -694,8 +742,7 @@ static int read_mstatus(CPURISCVState *env, int csrno, target_ulong *val)
 
 static int validate_vm(CPURISCVState *env, target_ulong vm)
 {
-    return (env->priv_ver >= PRIV_VERSION_1_10_0) ?
-        valid_vm_1_10[vm & 0xf] : valid_vm_1_09[vm & 0xf];
+    return valid_vm_1_10[vm & 0xf];
 }
 
 static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val)
@@ -705,34 +752,21 @@ static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val)
     int dirty;
 
     /* flush tlb on mstatus fields that affect VM */
-    if (env->priv_ver <= PRIV_VERSION_1_09_1) {
-        if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP |
-                MSTATUS_MPRV | MSTATUS_SUM | MSTATUS_VM)) {
-            tlb_flush(env_cpu(env));
-        }
-        mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
-            MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM |
-            MSTATUS_MPP | MSTATUS_MXR |
-            (validate_vm(env, get_field(val, MSTATUS_VM)) ?
-                MSTATUS_VM : 0);
+    if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
+            MSTATUS_MPRV | MSTATUS_SUM)) {
+        tlb_flush(env_cpu(env));
     }
-    if (env->priv_ver >= PRIV_VERSION_1_10_0) {
-        if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
-                MSTATUS_MPRV | MSTATUS_SUM)) {
-            tlb_flush(env_cpu(env));
-        }
-        mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
-            MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM |
-            MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
-            MSTATUS_TW;
+    mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
+        MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM |
+        MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
+        MSTATUS_TW;
 #if defined(TARGET_RISCV64)
-            /*
-             * RV32: MPV and MTL are not in mstatus. The current plan is to
-             * add them to mstatush. For now, we just don't support it.
-             */
-            mask |= MSTATUS_MTL | MSTATUS_MPV;
+    /*
+     * RV32: MPV and MTL are not in mstatus. The current plan is to
+     * add them to mstatush. For now, we just don't support it.
+     */
+    mask |= MSTATUS_MTL | MSTATUS_MPV;
 #endif
-    }
 
     mstatus = (mstatus & ~mask) | (val & mask);
 
@@ -881,18 +915,12 @@ static int write_mtvec(CPURISCVState *env, int csrno, target_ulong val)
 
 static int read_mcounteren(CPURISCVState *env, int csrno, target_ulong *val)
 {
-    if (env->priv_ver < PRIV_VERSION_1_10_0) {
-        return -1;
-    }
     *val = env->mcounteren;
     return 0;
 }
 
 static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val)
 {
-    if (env->priv_ver < PRIV_VERSION_1_10_0) {
-        return -1;
-    }
     env->mcounteren = val;
     return 0;
 }
@@ -900,8 +928,7 @@ static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val)
 /* This regiser is replaced with CSR_MCOUNTINHIBIT in 1.11.0 */
 static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val)
 {
-    if (env->priv_ver > PRIV_VERSION_1_09_1
-        && env->priv_ver < PRIV_VERSION_1_11_0) {
+    if (env->priv_ver < PRIV_VERSION_1_11_0) {
         return -1;
     }
     *val = env->mcounteren;
@@ -911,32 +938,13 @@ static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val)
 /* This regiser is replaced with CSR_MCOUNTINHIBIT in 1.11.0 */
 static int write_mscounteren(CPURISCVState *env, int csrno, target_ulong val)
 {
-    if (env->priv_ver > PRIV_VERSION_1_09_1
-        && env->priv_ver < PRIV_VERSION_1_11_0) {
+    if (env->priv_ver < PRIV_VERSION_1_11_0) {
         return -1;
     }
     env->mcounteren = val;
     return 0;
 }
 
-static int read_mucounteren(CPURISCVState *env, int csrno, target_ulong *val)
-{
-    if (env->priv_ver > PRIV_VERSION_1_09_1) {
-        return -1;
-    }
-    *val = env->scounteren;
-    return 0;
-}
-
-static int write_mucounteren(CPURISCVState *env, int csrno, target_ulong val)
-{
-    if (env->priv_ver > PRIV_VERSION_1_09_1) {
-        return -1;
-    }
-    env->scounteren = val;
-    return 0;
-}
-
 /* Machine Trap Handling */
 static int read_mscratch(CPURISCVState *env, int csrno, target_ulong *val)
 {
@@ -1010,16 +1018,14 @@ static int rmw_mip(CPURISCVState *env, int csrno, target_ulong *ret_value,
 /* Supervisor Trap Setup */
 static int read_sstatus(CPURISCVState *env, int csrno, target_ulong *val)
 {
-    target_ulong mask = ((env->priv_ver >= PRIV_VERSION_1_10_0) ?
-                         sstatus_v1_10_mask : sstatus_v1_9_mask);
+    target_ulong mask = (sstatus_v1_10_mask);
     *val = env->mstatus & mask;
     return 0;
 }
 
 static int write_sstatus(CPURISCVState *env, int csrno, target_ulong val)
 {
-    target_ulong mask = ((env->priv_ver >= PRIV_VERSION_1_10_0) ?
-                         sstatus_v1_10_mask : sstatus_v1_9_mask);
+    target_ulong mask = (sstatus_v1_10_mask);
     target_ulong newval = (env->mstatus & ~mask) | (val & mask);
     return write_mstatus(env, CSR_MSTATUS, newval);
 }
@@ -1069,18 +1075,12 @@ static int write_stvec(CPURISCVState *env, int csrno, target_ulong val)
 
 static int read_scounteren(CPURISCVState *env, int csrno, target_ulong *val)
 {
-    if (env->priv_ver < PRIV_VERSION_1_10_0) {
-        return -1;
-    }
     *val = env->scounteren;
     return 0;
 }
 
 static int write_scounteren(CPURISCVState *env, int csrno, target_ulong val)
 {
-    if (env->priv_ver < PRIV_VERSION_1_10_0) {
-        return -1;
-    }
     env->scounteren = val;
     return 0;
 }
@@ -1159,15 +1159,15 @@ static int read_satp(CPURISCVState *env, int csrno, target_ulong *val)
 {
     if (!riscv_feature(env, RISCV_FEATURE_MMU)) {
         *val = 0;
-    } else if (env->priv_ver >= PRIV_VERSION_1_10_0) {
-        if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) {
-            return -1;
-        } else {
-            *val = env->satp;
-        }
+        return 0;
+    }
+
+    if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) {
+        return -1;
     } else {
-        *val = env->sptbr;
+        *val = env->satp;
     }
+
     return 0;
 }
 
@@ -1176,13 +1176,7 @@ static int write_satp(CPURISCVState *env, int csrno, target_ulong val)
     if (!riscv_feature(env, RISCV_FEATURE_MMU)) {
         return 0;
     }
-    if (env->priv_ver <= PRIV_VERSION_1_09_1 && (val ^ env->sptbr)) {
-        tlb_flush(env_cpu(env));
-        env->sptbr = val & (((target_ulong)
-            1 << (TARGET_PHYS_ADDR_SPACE_BITS - PGSHIFT)) - 1);
-    }
-    if (env->priv_ver >= PRIV_VERSION_1_10_0 &&
-        validate_vm(env, get_field(val, SATP_MODE)) &&
+    if (validate_vm(env, get_field(val, SATP_MODE)) &&
         ((val ^ env->satp) & (SATP_MODE | SATP_ASID | SATP_PPN)))
     {
         if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) {
diff --git a/qemu/target/riscv/fpu_helper.c b/qemu/target/riscv/fpu_helper.c
index 3fb6684b16..4379756dc4 100644
--- a/qemu/target/riscv/fpu_helper.c
+++ b/qemu/target/riscv/fpu_helper.c
@@ -22,6 +22,7 @@
 #include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "fpu/softfloat.h"
+#include "internals.h"
 
 target_ulong riscv_cpu_get_fflags(CPURISCVState *env)
 {
@@ -230,21 +231,7 @@ uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1)
 
 target_ulong helper_fclass_s(uint64_t frs1)
 {
-    float32 f = frs1;
-    bool sign = float32_is_neg(f);
-
-    if (float32_is_infinity(f)) {
-        return sign ? 1 << 0 : 1 << 7;
-    } else if (float32_is_zero(f)) {
-        return sign ? 1 << 3 : 1 << 4;
-    } else if (float32_is_zero_or_denormal(f)) {
-        return sign ? 1 << 2 : 1 << 5;
-    } else if (float32_is_any_nan(f)) {
-        float_status s = { 0 }; /* for snan_bit_is_one */
-        return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
-    } else {
-        return sign ? 1 << 1 : 1 << 6;
-    }
+    return fclass_s(frs1);
 }
 
 uint64_t helper_fadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
@@ -353,19 +340,5 @@ uint64_t helper_fcvt_d_lu(CPURISCVState *env, uint64_t rs1)
 
 target_ulong helper_fclass_d(uint64_t frs1)
 {
-    float64 f = frs1;
-    bool sign = float64_is_neg(f);
-
-    if (float64_is_infinity(f)) {
-        return sign ? 1 << 0 : 1 << 7;
-    } else if (float64_is_zero(f)) {
-        return sign ? 1 << 3 : 1 << 4;
-    } else if (float64_is_zero_or_denormal(f)) {
-        return sign ? 1 << 2 : 1 << 5;
-    } else if (float64_is_any_nan(f)) {
-        float_status s = { 0 }; /* for snan_bit_is_one */
-        return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
-    } else {
-        return sign ? 1 << 1 : 1 << 6;
-    }
+    return fclass_d(frs1);
 }
diff --git a/qemu/target/riscv/helper.h b/qemu/target/riscv/helper.h
index 32e483860f..11b0f57c14 100644
--- a/qemu/target/riscv/helper.h
+++ b/qemu/target/riscv/helper.h
@@ -78,3 +78,1077 @@ DEF_HELPER_2(sret, tl, env, tl)
 DEF_HELPER_2(mret, tl, env, tl)
 DEF_HELPER_1(wfi, void, env)
 DEF_HELPER_1(tlb_flush, void, env)
+
+/* Hypervisor functions */
+#ifndef CONFIG_USER_ONLY
+DEF_HELPER_1(hyp_tlb_flush, void, env)
+#endif
+
+/* Vector functions */
+DEF_HELPER_3(vsetvl, tl, env, tl, tl)
+DEF_HELPER_5(vlb_v_b, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlb_v_b_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlb_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlb_v_h_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlb_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlb_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlb_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlb_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlh_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlh_v_h_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlh_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlh_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlh_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlh_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlw_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlw_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlw_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlw_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle_v_b, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle_v_b_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle_v_h_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbu_v_b, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbu_v_b_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbu_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbu_v_h_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbu_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbu_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbu_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbu_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhu_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhu_v_h_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhu_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhu_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhu_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhu_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlwu_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlwu_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlwu_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlwu_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsb_v_b, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsb_v_b_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsb_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsb_v_h_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsb_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsb_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsb_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsb_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsh_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsh_v_h_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsh_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsh_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsh_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsh_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsw_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsw_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsw_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsw_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse_v_b, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse_v_b_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse_v_h_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse_v_w_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse_v_d_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_6(vlsb_v_b, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsb_v_h, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsb_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsb_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsh_v_h, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsh_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsh_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsw_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsw_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlse_v_b, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlse_v_h, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlse_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlse_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsbu_v_b, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsbu_v_h, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsbu_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlsbu_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlshu_v_h, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlshu_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlshu_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlswu_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlswu_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vssb_v_b, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vssb_v_h, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vssb_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vssb_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vssh_v_h, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vssh_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vssh_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vssw_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vssw_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vsse_v_b, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vsse_v_h, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vsse_v_w, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vsse_v_d, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlxb_v_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxb_v_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxb_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxb_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxh_v_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxh_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxh_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxe_v_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxe_v_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxe_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxe_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxbu_v_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxbu_v_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxbu_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxbu_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxhu_v_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxhu_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxhu_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxwu_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxwu_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxb_v_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxb_v_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxb_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxb_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxh_v_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxh_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxh_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxe_v_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxe_v_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxe_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxe_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_5(vlbff_v_b, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbff_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbff_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbff_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhff_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhff_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhff_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlwff_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlwff_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vleff_v_b, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vleff_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vleff_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vleff_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbuff_v_b, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbuff_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbuff_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlbuff_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhuff_v_h, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32)
+#ifdef TARGET_RISCV64
+DEF_HELPER_6(vamoswapw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoswapd_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoaddw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoaddd_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoxorw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoxord_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoandw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoandd_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoorw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoord_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamominw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamomind_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamomaxw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamomaxd_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamominuw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamominud_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamomaxuw_v_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamomaxud_v_d, void, ptr, ptr, tl, ptr, env, i32)
+#endif
+DEF_HELPER_6(vamoswapw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoaddw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoxorw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoandw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamoorw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vec_rsubs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vec_rsubs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vec_rsubs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vec_rsubs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_6(vwaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsubu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsubu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsubu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwaddu_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwaddu_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwaddu_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsubu_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsubu_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsubu_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwadd_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsub_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwaddu_wx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwaddu_wx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwaddu_wx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsubu_wx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsubu_wx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsubu_wx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwadd_wx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwadd_wx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwadd_wx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsub_wx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsub_wx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsub_wx_w, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vand_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vand_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vand_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vand_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vor_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vor_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vor_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vor_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vxor_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vxor_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vxor_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vxor_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vand_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vand_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vand_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vand_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vor_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vor_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vor_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vor_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vxor_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vxor_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vxor_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vxor_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsll_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsra_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsll_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsll_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsll_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsll_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsrl_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsra_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsra_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vnsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsra_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vmseq_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmseq_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmseq_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmseq_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsne_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsne_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsne_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsne_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsltu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsltu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsltu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsltu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmslt_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmslt_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmslt_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmslt_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsleu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsleu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsleu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsleu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsle_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsle_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsle_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmsle_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmseq_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmseq_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmseq_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmseq_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsne_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsne_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsne_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsne_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsltu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsltu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsltu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsltu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmslt_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmslt_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmslt_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmslt_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsleu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsleu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsleu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsleu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsle_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsle_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsle_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsle_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsgtu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsgtu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsgtu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsgtu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsgt_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsgt_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsgt_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmsgt_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vminu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vminu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vminu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vminu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmin_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmaxu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmaxu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmaxu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmaxu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmax_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vminu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vminu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vminu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vminu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmin_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmin_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmin_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmin_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmaxu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmaxu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmaxu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmaxu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulh_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulh_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulh_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulh_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmul_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmul_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmul_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmul_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulh_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulh_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulh_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulh_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vdivu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vdivu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vdivu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vdivu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vdiv_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vremu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vremu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vremu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vremu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrem_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrem_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrem_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrem_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vdivu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vdivu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vdivu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vdivu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vdiv_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vdiv_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vdiv_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vdiv_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vremu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vremu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vremu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vremu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrem_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrem_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrem_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrem_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vwmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmulu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmulu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmulu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmulsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmulsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmulsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmul_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmul_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmul_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmulu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmulu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmulu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmulsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmulsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmulsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnmsac_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnmsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmacc_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnmsac_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnmsac_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnmsac_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnmsac_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnmsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnmsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnmsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnmsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vwmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vmerge_vvm_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmerge_vvm_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmerge_vvm_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmerge_vvm_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmerge_vxm_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmerge_vxm_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmerge_vxm_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmerge_vxm_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_4(vmv_v_v_b, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vmv_v_v_h, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vmv_v_v_w, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vmv_v_v_d, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32)
+DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32)
+DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32)
+DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32)
+
+DEF_HELPER_6(vsaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsaddu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssubu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsaddu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssubu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssubu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssubu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssubu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vaadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vsmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vwsmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwsmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vssrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssra_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssra_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssra_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssra_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vssrl_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssrl_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssrl_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssrl_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssra_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vssra_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vnclip_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclip_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclip_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclipu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclipu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclipu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclipu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclipu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclipu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclip_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclip_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclip_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vfadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfadd_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfadd_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfadd_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfrsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfrsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfrsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_6(vfwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwadd_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwadd_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwadd_wf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwadd_wf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwsub_wf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwsub_wf_w, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_6(vfmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmul_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmul_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmul_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfrdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfrdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfrdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_6(vfwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwmul_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwmul_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_6(vfmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_6(vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vfmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfmin_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmin_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmin_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmax_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmax_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmax_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_6(vfsgnj_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsgnj_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsgnj_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsgnjn_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsgnjn_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsgnjn_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsgnjx_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsgnjx_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsgnjx_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfsgnj_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsgnj_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsgnj_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsgnjn_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsgnjn_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsgnjn_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsgnjx_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsgnjx_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfsgnjx_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_6(vmfeq_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmfeq_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmfeq_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmfne_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmfne_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmfne_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmflt_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmflt_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmflt_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmfle_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmfle_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmfle_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmfeq_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfeq_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfeq_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfne_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfne_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfne_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmflt_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmflt_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmflt_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfle_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfle_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfle_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfgt_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfgt_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfgt_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfge_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfge_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmfge_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmford_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmford_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmford_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmford_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmford_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vfmerge_vfm_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmerge_vfm_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmerge_vfm_d, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_5(vfcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_xu_f_v_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_x_f_v_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_f_xu_v_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfcvt_f_x_v_d, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_5(vfwcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_5(vfncvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_x_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_x_f_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmaxu_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmaxu_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmaxu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmaxu_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmax_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredminu_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredminu_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredminu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredminu_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmin_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredand_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredand_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredand_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredand_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredor_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredor_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredor_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredor_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vwredsumu_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwredsumu_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwredsumu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vfredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vmand_mm, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmnand_mm, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmandnot_mm, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmxor_mm, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmor_mm, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmnor_mm, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32)
+
+DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32)
+
+DEF_HELPER_5(vmsbf_m, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vmsif_m, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vmsof_m, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_5(viota_m_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(viota_m_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(viota_m_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(viota_m_d, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vslideup_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslideup_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslideup_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslideup_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslidedown_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslidedown_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslidedown_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslidedown_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslide1up_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslide1up_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslide1up_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslide1up_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vrgather_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrgather_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrgather_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrgather_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrgather_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrgather_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrgather_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrgather_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32)
diff --git a/qemu/target/riscv/insn_trans/trans_privileged.inc.c b/qemu/target/riscv/insn_trans/trans_privileged.inc.c
index 05662b21e6..7bfb889d35 100644
--- a/qemu/target/riscv/insn_trans/trans_privileged.inc.c
+++ b/qemu/target/riscv/insn_trans/trans_privileged.inc.c
@@ -77,57 +77,11 @@ static bool trans_wfi(DisasContext *ctx, arg_wfi *a)
 static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a)
 {
     TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
-    if (ctx->priv_ver >= PRIV_VERSION_1_10_0) {
-        gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env);
-        return true;
-    }
-    return false;
+    gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env);
+    return true;
 }
 
 static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a)
 {
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
-    if (ctx->priv_ver <= PRIV_VERSION_1_09_1) {
-        gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env);
-        return true;
-    }
-    return false;
-}
-
-static bool trans_hfence_gvma(DisasContext *ctx, arg_sfence_vma *a)
-{
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
-    if (ctx->priv_ver >= PRIV_VERSION_1_10_0 &&
-        has_ext(ctx, RVH)) {
-        /* Hpervisor extensions exist */
-        /*
-         * if (env->priv == PRV_M ||
-         *   (env->priv == PRV_S &&
-         *    !riscv_cpu_virt_enabled(env) &&
-         *    get_field(ctx->mstatus_fs, MSTATUS_TVM))) {
-         */
-            gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env);
-            return true;
-        /* } */
-    }
-    return false;
-}
-
-static bool trans_hfence_bvma(DisasContext *ctx, arg_sfence_vma *a)
-{
-    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
-    if (ctx->priv_ver >= PRIV_VERSION_1_10_0 &&
-        has_ext(ctx, RVH)) {
-        /* Hpervisor extensions exist */
-        /*
-         * if (env->priv == PRV_M ||
-         *   (env->priv == PRV_S &&
-         *    !riscv_cpu_virt_enabled(env) &&
-         *    get_field(ctx->mstatus_fs, MSTATUS_TVM))) {
-         */
-            gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env);
-            return true;
-        /* } */
-    }
     return false;
 }
diff --git a/qemu/target/riscv/insn_trans/trans_rvd.inc.c b/qemu/target/riscv/insn_trans/trans_rvd.inc.c
index 2e643d5168..e461146e23 100644
--- a/qemu/target/riscv/insn_trans/trans_rvd.inc.c
+++ b/qemu/target/riscv/insn_trans/trans_rvd.inc.c
@@ -314,7 +314,7 @@ static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a)
     TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
 
     TCGv t0 = tcg_temp_new(tcg_ctx);
-    gen_helper_fclass_d(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]);
+    glue(gen_helper_fclass_d, UNICORN_ARCH_POSTFIX)(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]);
     gen_set_gpr(tcg_ctx, a->rd, t0);
     tcg_temp_free(tcg_ctx, t0);
     return true;
diff --git a/qemu/target/riscv/insn_trans/trans_rvf.inc.c b/qemu/target/riscv/insn_trans/trans_rvf.inc.c
index de044bfeb9..b4fd677b23 100644
--- a/qemu/target/riscv/insn_trans/trans_rvf.inc.c
+++ b/qemu/target/riscv/insn_trans/trans_rvf.inc.c
@@ -23,6 +23,21 @@
         return false;                       \
 } while (0)
 
+/*
+ * RISC-V requires NaN-boxing of narrower width floating
+ * point values.  This applies when a 32-bit value is
+ * assigned to a 64-bit FP register.  Thus this does not
+ * apply when the RVD extension is not present.
+ */
+static void gen_nanbox_fpr(DisasContext *ctx, int regno)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    if (has_ext(ctx, RVD)) {
+        tcg_gen_ori_i64(tcg_ctx, tcg_ctx->cpu_fpr[regno], tcg_ctx->cpu_fpr[regno],
+                        MAKE_64BIT_MASK(32, 32));
+    }
+}
+
 static bool trans_flw(DisasContext *ctx, arg_flw *a)
 {
     TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
@@ -33,8 +48,7 @@ static bool trans_flw(DisasContext *ctx, arg_flw *a)
     tcg_gen_addi_tl(tcg_ctx, t0, t0, a->imm);
 
     tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEUL);
-    /* RISC-V requires NaN-boxing of narrower width floating point values */
-    tcg_gen_ori_i64(tcg_ctx, tcg_ctx->cpu_fpr[a->rd], tcg_ctx->cpu_fpr[a->rd], 0xffffffff00000000ULL);
+    gen_nanbox_fpr(ctx, a->rd);
 
     tcg_temp_free(tcg_ctx, t0);
     mark_fs_dirty(ctx);
@@ -343,7 +357,7 @@ static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a)
 
     TCGv t0 = tcg_temp_new(tcg_ctx);
 
-    gen_helper_fclass_s(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]);
+    glue(gen_helper_fclass_s, UNICORN_ARCH_POSTFIX)(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]);
 
     gen_set_gpr(tcg_ctx, a->rd, t0);
     tcg_temp_free(tcg_ctx, t0);
diff --git a/qemu/target/riscv/insn_trans/trans_rvh.inc.c b/qemu/target/riscv/insn_trans/trans_rvh.inc.c
new file mode 100644
index 0000000000..c238510e4f
--- /dev/null
+++ b/qemu/target/riscv/insn_trans/trans_rvh.inc.c
@@ -0,0 +1,33 @@
+/*
+ * RISC-V translation routines for the RVXI Base Integer Instruction Set.
+ *
+ * Copyright (c) 2020 Western Digital
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+static bool trans_hfence_gvma(DisasContext *ctx, arg_sfence_vma *a)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    REQUIRE_EXT(ctx, RVH);
+    gen_helper_hyp_tlb_flush(tcg_ctx, tcg_ctx->cpu_env);
+    return true;
+}
+
+static bool trans_hfence_vvma(DisasContext *ctx, arg_sfence_vma *a)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    REQUIRE_EXT(ctx, RVH);
+    gen_helper_hyp_tlb_flush(tcg_ctx, tcg_ctx->cpu_env);
+    return true;
+}
diff --git a/qemu/target/riscv/insn_trans/trans_rvv.inc.c b/qemu/target/riscv/insn_trans/trans_rvv.inc.c
new file mode 100644
index 0000000000..40b74f11ce
--- /dev/null
+++ b/qemu/target/riscv/insn_trans/trans_rvv.inc.c
@@ -0,0 +1,2954 @@
+/*
+ * RISC-V translation routines for the RVV Standard Extension.
+ *
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "tcg/tcg-op-gvec.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "internals.h"
+
+static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGv s1, s2, dst;
+
+    if (!has_ext(ctx, RVV)) {
+        return false;
+    }
+
+    s2 = tcg_temp_new(tcg_ctx);
+    dst = tcg_temp_new(tcg_ctx);
+
+    /* Using x0 as the rs1 register specifier, encodes an infinite AVL */
+    if (a->rs1 == 0) {
+        /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
+        s1 = tcg_const_tl(tcg_ctx, RV_VLEN_MAX);
+    } else {
+        s1 = tcg_temp_new(tcg_ctx);
+        gen_get_gpr(tcg_ctx, s1, a->rs1);
+    }
+    gen_get_gpr(tcg_ctx, s2, a->rs2);
+    gen_helper_vsetvl(tcg_ctx, dst, tcg_ctx->cpu_env, s1, s2);
+    gen_set_gpr(tcg_ctx, a->rd, dst);
+    tcg_gen_movi_tl(tcg_ctx, tcg_ctx->cpu_pc, ctx->pc_succ_insn);
+    lookup_and_goto_ptr(ctx);
+    ctx->base.is_jmp = DISAS_NORETURN;
+
+    tcg_temp_free(tcg_ctx, s1);
+    tcg_temp_free(tcg_ctx, s2);
+    tcg_temp_free(tcg_ctx, dst);
+    return true;
+}
+
+static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGv s1, s2, dst;
+
+    if (!has_ext(ctx, RVV)) {
+        return false;
+    }
+
+    s2 = tcg_const_tl(tcg_ctx, a->zimm);
+    dst = tcg_temp_new(tcg_ctx);
+
+    /* Using x0 as the rs1 register specifier, encodes an infinite AVL */
+    if (a->rs1 == 0) {
+        /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
+        s1 = tcg_const_tl(tcg_ctx, RV_VLEN_MAX);
+    } else {
+        s1 = tcg_temp_new(tcg_ctx);
+        gen_get_gpr(tcg_ctx, s1, a->rs1);
+    }
+    gen_helper_vsetvl(tcg_ctx, dst, tcg_ctx->cpu_env, s1, s2);
+    gen_set_gpr(tcg_ctx, a->rd, dst);
+    gen_goto_tb(ctx, 0, ctx->pc_succ_insn);
+    ctx->base.is_jmp = DISAS_NORETURN;
+
+    tcg_temp_free(tcg_ctx, s1);
+    tcg_temp_free(tcg_ctx, s2);
+    tcg_temp_free(tcg_ctx, dst);
+    return true;
+}
+
+/* vector register offset from env */
+static uint32_t vreg_ofs(DisasContext *s, int reg)
+{
+    return offsetof(CPURISCVState, vreg) + reg * s->vlen / 8;
+}
+
+/* check functions */
+
+/*
+ * In cpu_get_tb_cpu_state(), set VILL if RVV was not present.
+ * So RVV is also be checked in this function.
+ */
+static bool vext_check_isa_ill(DisasContext *s)
+{
+    return !s->vill;
+}
+
+/*
+ * There are two rules check here.
+ *
+ * 1. Vector register numbers are multiples of LMUL. (Section 3.2)
+ *
+ * 2. For all widening instructions, the destination LMUL value must also be
+ *    a supported LMUL value. (Section 11.2)
+ */
+static bool vext_check_reg(DisasContext *s, uint32_t reg, bool widen)
+{
+    /*
+     * The destination vector register group results are arranged as if both
+     * SEW and LMUL were at twice their current settings. (Section 11.2).
+     */
+    int legal = widen ? 2 << s->lmul : 1 << s->lmul;
+
+    return !((s->lmul == 0x3 && widen) || (reg % legal));
+}
+
+/*
+ * There are two rules check here.
+ *
+ * 1. The destination vector register group for a masked vector instruction can
+ *    only overlap the source mask register (v0) when LMUL=1. (Section 5.3)
+ *
+ * 2. In widen instructions and some other insturctions, like vslideup.vx,
+ *    there is no need to check whether LMUL=1.
+ */
+static bool vext_check_overlap_mask(DisasContext *s, uint32_t vd, bool vm,
+    bool force)
+{
+    return (vm != 0 || vd != 0) || (!force && (s->lmul == 0));
+}
+
+/* The LMUL setting must be such that LMUL * NFIELDS <= 8. (Section 7.8) */
+static bool vext_check_nf(DisasContext *s, uint32_t nf)
+{
+    return (1 << s->lmul) * nf <= 8;
+}
+
+/*
+ * The destination vector register group cannot overlap a source vector register
+ * group of a different element width. (Section 11.2)
+ */
+static inline bool vext_check_overlap_group(int rd, int dlen, int rs, int slen)
+{
+    return ((rd >= rs + slen) || (rs >= rd + dlen));
+}
+/* common translation macro */
+#define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK)      \
+static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\
+{                                                          \
+    if (CHECK(s, a)) {                                     \
+        return OP(s, a, SEQ);                              \
+    }                                                      \
+    return false;                                          \
+}
+
+/*
+ *** unit stride load and store
+ */
+typedef void gen_helper_ldst_us(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv,
+                                TCGv_env, TCGv_i32);
+
+static bool ldst_us_trans(TCGContext *tcg_ctx, uint32_t vd, uint32_t rs1, uint32_t data,
+                          gen_helper_ldst_us *fn, DisasContext *s)
+{
+    TCGv_ptr dest, mask;
+    TCGv base;
+    TCGv_i32 desc;
+
+    TCGLabel *over = gen_new_label(tcg_ctx);
+    tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+    dest = tcg_temp_new_ptr(tcg_ctx);
+    mask = tcg_temp_new_ptr(tcg_ctx);
+    base = tcg_temp_new(tcg_ctx);
+
+    /*
+     * As simd_desc supports at most 256 bytes, and in this implementation,
+     * the max vector group length is 2048 bytes. So split it into two parts.
+     *
+     * The first part is vlen in bytes, encoded in maxsz of simd_desc.
+     * The second part is lmul, encoded in data of simd_desc.
+     */
+    desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+
+    gen_get_gpr(tcg_ctx, base, rs1);
+    tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd));
+    tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0));
+
+    fn(tcg_ctx, dest, mask, base, tcg_ctx->cpu_env, desc);
+
+    tcg_temp_free_ptr(tcg_ctx, dest);
+    tcg_temp_free_ptr(tcg_ctx, mask);
+    tcg_temp_free(tcg_ctx, base);
+    tcg_temp_free_i32(tcg_ctx, desc);
+    gen_set_label(tcg_ctx, over);
+    return true;
+}
+
+static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    uint32_t data = 0;
+    gen_helper_ldst_us *fn;
+    static gen_helper_ldst_us * const fns[2][7][4] = {
+        /* masked unit stride load */
+        { { gen_helper_vlb_v_b_mask,  gen_helper_vlb_v_h_mask,
+            gen_helper_vlb_v_w_mask,  gen_helper_vlb_v_d_mask },
+          { NULL,                     gen_helper_vlh_v_h_mask,
+            gen_helper_vlh_v_w_mask,  gen_helper_vlh_v_d_mask },
+          { NULL,                     NULL,
+            gen_helper_vlw_v_w_mask,  gen_helper_vlw_v_d_mask },
+          { gen_helper_vle_v_b_mask,  gen_helper_vle_v_h_mask,
+            gen_helper_vle_v_w_mask,  gen_helper_vle_v_d_mask },
+          { gen_helper_vlbu_v_b_mask, gen_helper_vlbu_v_h_mask,
+            gen_helper_vlbu_v_w_mask, gen_helper_vlbu_v_d_mask },
+          { NULL,                     gen_helper_vlhu_v_h_mask,
+            gen_helper_vlhu_v_w_mask, gen_helper_vlhu_v_d_mask },
+          { NULL,                     NULL,
+            gen_helper_vlwu_v_w_mask, gen_helper_vlwu_v_d_mask } },
+        /* unmasked unit stride load */
+        { { gen_helper_vlb_v_b,  gen_helper_vlb_v_h,
+            gen_helper_vlb_v_w,  gen_helper_vlb_v_d },
+          { NULL,                gen_helper_vlh_v_h,
+            gen_helper_vlh_v_w,  gen_helper_vlh_v_d },
+          { NULL,                NULL,
+            gen_helper_vlw_v_w,  gen_helper_vlw_v_d },
+          { gen_helper_vle_v_b,  gen_helper_vle_v_h,
+            gen_helper_vle_v_w,  gen_helper_vle_v_d },
+          { gen_helper_vlbu_v_b, gen_helper_vlbu_v_h,
+            gen_helper_vlbu_v_w, gen_helper_vlbu_v_d },
+          { NULL,                gen_helper_vlhu_v_h,
+            gen_helper_vlhu_v_w, gen_helper_vlhu_v_d },
+          { NULL,                NULL,
+            gen_helper_vlwu_v_w, gen_helper_vlwu_v_d } }
+    };
+
+    fn =  fns[a->vm][seq][s->sew];
+    if (fn == NULL) {
+        return false;
+    }
+
+    FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+    FIELD_DP32(data, VDATA, VM, a->vm, data);
+    FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+    FIELD_DP32(data, VDATA, NF, a->nf, data);
+    return ldst_us_trans(tcg_ctx, a->rd, a->rs1, data, fn, s);
+}
+
+static bool ld_us_check(DisasContext *s, arg_r2nfvm* a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_nf(s, a->nf));
+}
+
+GEN_VEXT_TRANS(vlb_v, 0, r2nfvm, ld_us_op, ld_us_check)
+GEN_VEXT_TRANS(vlh_v, 1, r2nfvm, ld_us_op, ld_us_check)
+GEN_VEXT_TRANS(vlw_v, 2, r2nfvm, ld_us_op, ld_us_check)
+GEN_VEXT_TRANS(vle_v, 3, r2nfvm, ld_us_op, ld_us_check)
+GEN_VEXT_TRANS(vlbu_v, 4, r2nfvm, ld_us_op, ld_us_check)
+GEN_VEXT_TRANS(vlhu_v, 5, r2nfvm, ld_us_op, ld_us_check)
+GEN_VEXT_TRANS(vlwu_v, 6, r2nfvm, ld_us_op, ld_us_check)
+
+static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    uint32_t data = 0;
+    gen_helper_ldst_us *fn;
+    static gen_helper_ldst_us * const fns[2][4][4] = {
+        /* masked unit stride load and store */
+        { { gen_helper_vsb_v_b_mask,  gen_helper_vsb_v_h_mask,
+            gen_helper_vsb_v_w_mask,  gen_helper_vsb_v_d_mask },
+          { NULL,                     gen_helper_vsh_v_h_mask,
+            gen_helper_vsh_v_w_mask,  gen_helper_vsh_v_d_mask },
+          { NULL,                     NULL,
+            gen_helper_vsw_v_w_mask,  gen_helper_vsw_v_d_mask },
+          { gen_helper_vse_v_b_mask,  gen_helper_vse_v_h_mask,
+            gen_helper_vse_v_w_mask,  gen_helper_vse_v_d_mask } },
+        /* unmasked unit stride store */
+        { { gen_helper_vsb_v_b,  gen_helper_vsb_v_h,
+            gen_helper_vsb_v_w,  gen_helper_vsb_v_d },
+          { NULL,                gen_helper_vsh_v_h,
+            gen_helper_vsh_v_w,  gen_helper_vsh_v_d },
+          { NULL,                NULL,
+            gen_helper_vsw_v_w,  gen_helper_vsw_v_d },
+          { gen_helper_vse_v_b,  gen_helper_vse_v_h,
+            gen_helper_vse_v_w,  gen_helper_vse_v_d } }
+    };
+
+    fn =  fns[a->vm][seq][s->sew];
+    if (fn == NULL) {
+        return false;
+    }
+
+    FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+    FIELD_DP32(data, VDATA, VM, a->vm, data);
+    FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+    FIELD_DP32(data, VDATA, NF, a->nf, data);
+    return ldst_us_trans(tcg_ctx, a->rd, a->rs1, data, fn, s);
+}
+
+static bool st_us_check(DisasContext *s, arg_r2nfvm* a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_nf(s, a->nf));
+}
+
+GEN_VEXT_TRANS(vsb_v, 0, r2nfvm, st_us_op, st_us_check)
+GEN_VEXT_TRANS(vsh_v, 1, r2nfvm, st_us_op, st_us_check)
+GEN_VEXT_TRANS(vsw_v, 2, r2nfvm, st_us_op, st_us_check)
+GEN_VEXT_TRANS(vse_v, 3, r2nfvm, st_us_op, st_us_check)
+
+/*
+ *** stride load and store
+ */
+typedef void gen_helper_ldst_stride(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv,
+                                    TCGv, TCGv_env, TCGv_i32);
+
+static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
+                              uint32_t data, gen_helper_ldst_stride *fn,
+                              DisasContext *s)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_ptr dest, mask;
+    TCGv base, stride;
+    TCGv_i32 desc;
+
+    TCGLabel *over = gen_new_label(tcg_ctx);
+    tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+    dest = tcg_temp_new_ptr(tcg_ctx);
+    mask = tcg_temp_new_ptr(tcg_ctx);
+    base = tcg_temp_new(tcg_ctx);
+    stride = tcg_temp_new(tcg_ctx);
+    desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+
+    gen_get_gpr(tcg_ctx, base, rs1);
+    gen_get_gpr(tcg_ctx, stride, rs2);
+    tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd));
+    tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0));
+
+    fn(tcg_ctx, dest, mask, base, stride, tcg_ctx->cpu_env, desc);
+
+    tcg_temp_free_ptr(tcg_ctx, dest);
+    tcg_temp_free_ptr(tcg_ctx, mask);
+    tcg_temp_free(tcg_ctx, base);
+    tcg_temp_free(tcg_ctx, stride);
+    tcg_temp_free_i32(tcg_ctx, desc);
+    gen_set_label(tcg_ctx, over);
+    return true;
+}
+
+static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
+{
+    uint32_t data = 0;
+    gen_helper_ldst_stride *fn;
+    static gen_helper_ldst_stride * const fns[7][4] = {
+        { gen_helper_vlsb_v_b,  gen_helper_vlsb_v_h,
+          gen_helper_vlsb_v_w,  gen_helper_vlsb_v_d },
+        { NULL,                 gen_helper_vlsh_v_h,
+          gen_helper_vlsh_v_w,  gen_helper_vlsh_v_d },
+        { NULL,                 NULL,
+          gen_helper_vlsw_v_w,  gen_helper_vlsw_v_d },
+        { gen_helper_vlse_v_b,  gen_helper_vlse_v_h,
+          gen_helper_vlse_v_w,  gen_helper_vlse_v_d },
+        { gen_helper_vlsbu_v_b, gen_helper_vlsbu_v_h,
+          gen_helper_vlsbu_v_w, gen_helper_vlsbu_v_d },
+        { NULL,                 gen_helper_vlshu_v_h,
+          gen_helper_vlshu_v_w, gen_helper_vlshu_v_d },
+        { NULL,                 NULL,
+          gen_helper_vlswu_v_w, gen_helper_vlswu_v_d },
+    };
+
+    fn =  fns[seq][s->sew];
+    if (fn == NULL) {
+        return false;
+    }
+
+    FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+    FIELD_DP32(data, VDATA, VM, a->vm, data);
+    FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+    FIELD_DP32(data, VDATA, NF, a->nf, data);
+    return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
+}
+
+static bool ld_stride_check(DisasContext *s, arg_rnfvm* a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_nf(s, a->nf));
+}
+
+GEN_VEXT_TRANS(vlsb_v, 0, rnfvm, ld_stride_op, ld_stride_check)
+GEN_VEXT_TRANS(vlsh_v, 1, rnfvm, ld_stride_op, ld_stride_check)
+GEN_VEXT_TRANS(vlsw_v, 2, rnfvm, ld_stride_op, ld_stride_check)
+GEN_VEXT_TRANS(vlse_v, 3, rnfvm, ld_stride_op, ld_stride_check)
+GEN_VEXT_TRANS(vlsbu_v, 4, rnfvm, ld_stride_op, ld_stride_check)
+GEN_VEXT_TRANS(vlshu_v, 5, rnfvm, ld_stride_op, ld_stride_check)
+GEN_VEXT_TRANS(vlswu_v, 6, rnfvm, ld_stride_op, ld_stride_check)
+
+static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
+{
+    uint32_t data = 0;
+    gen_helper_ldst_stride *fn;
+    static gen_helper_ldst_stride * const fns[4][4] = {
+        /* masked stride store */
+        { gen_helper_vssb_v_b,  gen_helper_vssb_v_h,
+          gen_helper_vssb_v_w,  gen_helper_vssb_v_d },
+        { NULL,                 gen_helper_vssh_v_h,
+          gen_helper_vssh_v_w,  gen_helper_vssh_v_d },
+        { NULL,                 NULL,
+          gen_helper_vssw_v_w,  gen_helper_vssw_v_d },
+        { gen_helper_vsse_v_b,  gen_helper_vsse_v_h,
+          gen_helper_vsse_v_w,  gen_helper_vsse_v_d }
+    };
+
+    FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+    FIELD_DP32(data, VDATA, VM, a->vm, data);
+    FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+    FIELD_DP32(data, VDATA, NF, a->nf, data);
+    fn =  fns[seq][s->sew];
+    if (fn == NULL) {
+        return false;
+    }
+
+    return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
+}
+
+static bool st_stride_check(DisasContext *s, arg_rnfvm* a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_nf(s, a->nf));
+}
+
+GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check)
+GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check)
+GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check)
+GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check)
+
+/*
+ *** index load and store
+ */
+typedef void gen_helper_ldst_index(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv,
+                                   TCGv_ptr, TCGv_env, TCGv_i32);
+
+static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
+                             uint32_t data, gen_helper_ldst_index *fn,
+                             DisasContext *s)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_ptr dest, mask, index;
+    TCGv base;
+    TCGv_i32 desc;
+
+    TCGLabel *over = gen_new_label(tcg_ctx);
+    tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+    dest = tcg_temp_new_ptr(tcg_ctx);
+    mask = tcg_temp_new_ptr(tcg_ctx);
+    index = tcg_temp_new_ptr(tcg_ctx);
+    base = tcg_temp_new(tcg_ctx);
+    desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+
+    gen_get_gpr(tcg_ctx, base, rs1);
+    tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd));
+    tcg_gen_addi_ptr(tcg_ctx, index, tcg_ctx->cpu_env, vreg_ofs(s, vs2));
+    tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0));
+
+    fn(tcg_ctx, dest, mask, base, index, tcg_ctx->cpu_env, desc);
+
+    tcg_temp_free_ptr(tcg_ctx, dest);
+    tcg_temp_free_ptr(tcg_ctx, mask);
+    tcg_temp_free_ptr(tcg_ctx, index);
+    tcg_temp_free(tcg_ctx, base);
+    tcg_temp_free_i32(tcg_ctx, desc);
+    gen_set_label(tcg_ctx, over);
+    return true;
+}
+
+static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
+{
+    uint32_t data = 0;
+    gen_helper_ldst_index *fn;
+    static gen_helper_ldst_index * const fns[7][4] = {
+        { gen_helper_vlxb_v_b,  gen_helper_vlxb_v_h,
+          gen_helper_vlxb_v_w,  gen_helper_vlxb_v_d },
+        { NULL,                 gen_helper_vlxh_v_h,
+          gen_helper_vlxh_v_w,  gen_helper_vlxh_v_d },
+        { NULL,                 NULL,
+          gen_helper_vlxw_v_w,  gen_helper_vlxw_v_d },
+        { gen_helper_vlxe_v_b,  gen_helper_vlxe_v_h,
+          gen_helper_vlxe_v_w,  gen_helper_vlxe_v_d },
+        { gen_helper_vlxbu_v_b, gen_helper_vlxbu_v_h,
+          gen_helper_vlxbu_v_w, gen_helper_vlxbu_v_d },
+        { NULL,                 gen_helper_vlxhu_v_h,
+          gen_helper_vlxhu_v_w, gen_helper_vlxhu_v_d },
+        { NULL,                 NULL,
+          gen_helper_vlxwu_v_w, gen_helper_vlxwu_v_d },
+    };
+
+    fn =  fns[seq][s->sew];
+    if (fn == NULL) {
+        return false;
+    }
+
+    FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+    FIELD_DP32(data, VDATA, VM, a->vm, data);
+    FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+    FIELD_DP32(data, VDATA, NF, a->nf, data);
+    return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
+}
+
+/*
+ * For vector indexed segment loads, the destination vector register
+ * groups cannot overlap the source vector register group (specified by
+ * `vs2`), else an illegal instruction exception is raised.
+ */
+static bool ld_index_check(DisasContext *s, arg_rnfvm* a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_nf(s, a->nf) &&
+            ((a->nf == 1) ||
+             vext_check_overlap_group(a->rd, a->nf << s->lmul,
+                                      a->rs2, 1 << s->lmul)));
+}
+
+GEN_VEXT_TRANS(vlxb_v, 0, rnfvm, ld_index_op, ld_index_check)
+GEN_VEXT_TRANS(vlxh_v, 1, rnfvm, ld_index_op, ld_index_check)
+GEN_VEXT_TRANS(vlxw_v, 2, rnfvm, ld_index_op, ld_index_check)
+GEN_VEXT_TRANS(vlxe_v, 3, rnfvm, ld_index_op, ld_index_check)
+GEN_VEXT_TRANS(vlxbu_v, 4, rnfvm, ld_index_op, ld_index_check)
+GEN_VEXT_TRANS(vlxhu_v, 5, rnfvm, ld_index_op, ld_index_check)
+GEN_VEXT_TRANS(vlxwu_v, 6, rnfvm, ld_index_op, ld_index_check)
+
+static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
+{
+    uint32_t data = 0;
+    gen_helper_ldst_index *fn;
+    static gen_helper_ldst_index * const fns[4][4] = {
+        { gen_helper_vsxb_v_b,  gen_helper_vsxb_v_h,
+          gen_helper_vsxb_v_w,  gen_helper_vsxb_v_d },
+        { NULL,                 gen_helper_vsxh_v_h,
+          gen_helper_vsxh_v_w,  gen_helper_vsxh_v_d },
+        { NULL,                 NULL,
+          gen_helper_vsxw_v_w,  gen_helper_vsxw_v_d },
+        { gen_helper_vsxe_v_b,  gen_helper_vsxe_v_h,
+          gen_helper_vsxe_v_w,  gen_helper_vsxe_v_d }
+    };
+
+    fn =  fns[seq][s->sew];
+    if (fn == NULL) {
+        return false;
+    }
+
+    FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+    FIELD_DP32(data, VDATA, VM, a->vm, data);
+    FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+    FIELD_DP32(data, VDATA, NF, a->nf, data);
+    return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
+}
+
+static bool st_index_check(DisasContext *s, arg_rnfvm* a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_nf(s, a->nf));
+}
+
+GEN_VEXT_TRANS(vsxb_v, 0, rnfvm, st_index_op, st_index_check)
+GEN_VEXT_TRANS(vsxh_v, 1, rnfvm, st_index_op, st_index_check)
+GEN_VEXT_TRANS(vsxw_v, 2, rnfvm, st_index_op, st_index_check)
+GEN_VEXT_TRANS(vsxe_v, 3, rnfvm, st_index_op, st_index_check)
+
+/*
+ *** unit stride fault-only-first load
+ */
+static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
+                       gen_helper_ldst_us *fn, DisasContext *s)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_ptr dest, mask;
+    TCGv base;
+    TCGv_i32 desc;
+
+    TCGLabel *over = gen_new_label(tcg_ctx);
+    tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+    dest = tcg_temp_new_ptr(tcg_ctx);
+    mask = tcg_temp_new_ptr(tcg_ctx);
+    base = tcg_temp_new(tcg_ctx);
+    desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+
+    gen_get_gpr(tcg_ctx, base, rs1);
+    tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd));
+    tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0));
+
+    fn(tcg_ctx, dest, mask, base, tcg_ctx->cpu_env, desc);
+
+    tcg_temp_free_ptr(tcg_ctx, dest);
+    tcg_temp_free_ptr(tcg_ctx, mask);
+    tcg_temp_free(tcg_ctx, base);
+    tcg_temp_free_i32(tcg_ctx, desc);
+    gen_set_label(tcg_ctx, over);
+    return true;
+}
+
+static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
+{
+    uint32_t data = 0;
+    gen_helper_ldst_us *fn;
+    static gen_helper_ldst_us * const fns[7][4] = {
+        { gen_helper_vlbff_v_b,  gen_helper_vlbff_v_h,
+          gen_helper_vlbff_v_w,  gen_helper_vlbff_v_d },
+        { NULL,                  gen_helper_vlhff_v_h,
+          gen_helper_vlhff_v_w,  gen_helper_vlhff_v_d },
+        { NULL,                  NULL,
+          gen_helper_vlwff_v_w,  gen_helper_vlwff_v_d },
+        { gen_helper_vleff_v_b,  gen_helper_vleff_v_h,
+          gen_helper_vleff_v_w,  gen_helper_vleff_v_d },
+        { gen_helper_vlbuff_v_b, gen_helper_vlbuff_v_h,
+          gen_helper_vlbuff_v_w, gen_helper_vlbuff_v_d },
+        { NULL,                  gen_helper_vlhuff_v_h,
+          gen_helper_vlhuff_v_w, gen_helper_vlhuff_v_d },
+        { NULL,                  NULL,
+          gen_helper_vlwuff_v_w, gen_helper_vlwuff_v_d }
+    };
+
+    fn =  fns[seq][s->sew];
+    if (fn == NULL) {
+        return false;
+    }
+
+    FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+    FIELD_DP32(data, VDATA, VM, a->vm, data);
+    FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+    FIELD_DP32(data, VDATA, NF, a->nf, data);
+    return ldff_trans(a->rd, a->rs1, data, fn, s);
+}
+
+GEN_VEXT_TRANS(vlbff_v, 0, r2nfvm, ldff_op, ld_us_check)
+GEN_VEXT_TRANS(vlhff_v, 1, r2nfvm, ldff_op, ld_us_check)
+GEN_VEXT_TRANS(vlwff_v, 2, r2nfvm, ldff_op, ld_us_check)
+GEN_VEXT_TRANS(vleff_v, 3, r2nfvm, ldff_op, ld_us_check)
+GEN_VEXT_TRANS(vlbuff_v, 4, r2nfvm, ldff_op, ld_us_check)
+GEN_VEXT_TRANS(vlhuff_v, 5, r2nfvm, ldff_op, ld_us_check)
+GEN_VEXT_TRANS(vlwuff_v, 6, r2nfvm, ldff_op, ld_us_check)
+
+/*
+ *** vector atomic operation
+ */
+typedef void gen_helper_amo(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr,
+                            TCGv_env, TCGv_i32);
+
+static bool amo_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
+                      uint32_t data, gen_helper_amo *fn, DisasContext *s)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_ptr dest, mask, index;
+    TCGv base;
+    TCGv_i32 desc;
+
+    TCGLabel *over = gen_new_label(tcg_ctx);
+    tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+    dest = tcg_temp_new_ptr(tcg_ctx);
+    mask = tcg_temp_new_ptr(tcg_ctx);
+    index = tcg_temp_new_ptr(tcg_ctx);
+    base = tcg_temp_new(tcg_ctx);
+    desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+
+    gen_get_gpr(tcg_ctx, base, rs1);
+    tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd));
+    tcg_gen_addi_ptr(tcg_ctx, index, tcg_ctx->cpu_env, vreg_ofs(s, vs2));
+    tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0));
+
+    fn(tcg_ctx, dest, mask, base, index, tcg_ctx->cpu_env, desc);
+
+    tcg_temp_free_ptr(tcg_ctx, dest);
+    tcg_temp_free_ptr(tcg_ctx, mask);
+    tcg_temp_free_ptr(tcg_ctx, index);
+    tcg_temp_free(tcg_ctx, base);
+    tcg_temp_free_i32(tcg_ctx, desc);
+    gen_set_label(tcg_ctx, over);
+    return true;
+}
+
+static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    uint32_t data = 0;
+    gen_helper_amo *fn;
+    static gen_helper_amo *const fnsw[9] = {
+        /* no atomic operation */
+        gen_helper_vamoswapw_v_w,
+        gen_helper_vamoaddw_v_w,
+        gen_helper_vamoxorw_v_w,
+        gen_helper_vamoandw_v_w,
+        gen_helper_vamoorw_v_w,
+        gen_helper_vamominw_v_w,
+        gen_helper_vamomaxw_v_w,
+        gen_helper_vamominuw_v_w,
+        gen_helper_vamomaxuw_v_w
+    };
+#ifdef TARGET_RISCV64
+    static gen_helper_amo *const fnsd[18] = {
+        gen_helper_vamoswapw_v_d,
+        gen_helper_vamoaddw_v_d,
+        gen_helper_vamoxorw_v_d,
+        gen_helper_vamoandw_v_d,
+        gen_helper_vamoorw_v_d,
+        gen_helper_vamominw_v_d,
+        gen_helper_vamomaxw_v_d,
+        gen_helper_vamominuw_v_d,
+        gen_helper_vamomaxuw_v_d,
+        gen_helper_vamoswapd_v_d,
+        gen_helper_vamoaddd_v_d,
+        gen_helper_vamoxord_v_d,
+        gen_helper_vamoandd_v_d,
+        gen_helper_vamoord_v_d,
+        gen_helper_vamomind_v_d,
+        gen_helper_vamomaxd_v_d,
+        gen_helper_vamominud_v_d,
+        gen_helper_vamomaxud_v_d
+    };
+#endif
+
+    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+        gen_helper_exit_atomic(tcg_ctx, tcg_ctx->cpu_env);
+        s->base.is_jmp = DISAS_NORETURN;
+        return true;
+    } else {
+        if (s->sew == 3) {
+#ifdef TARGET_RISCV64
+            fn = fnsd[seq];
+#else
+            /* Check done in amo_check(). */
+            g_assert_not_reached();
+#endif
+        } else {
+            assert(seq < ARRAY_SIZE(fnsw));
+            fn = fnsw[seq];
+        }
+    }
+
+    FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+    FIELD_DP32(data, VDATA, VM, a->vm, data);
+    FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+    FIELD_DP32(data, VDATA, WD, a->wd, data);
+    return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
+}
+/*
+ * There are two rules check here.
+ *
+ * 1. SEW must be at least as wide as the AMO memory element size.
+ *
+ * 2. If SEW is greater than XLEN, an illegal instruction exception is raised.
+ */
+static bool amo_check(DisasContext *s, arg_rwdvm* a)
+{
+    return (!s->vill && has_ext(s, RVA) &&
+            (!a->wd || vext_check_overlap_mask(s, a->rd, a->vm, false)) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            ((1 << s->sew) <= sizeof(target_ulong)) &&
+            ((1 << s->sew) >= 4));
+}
+
+GEN_VEXT_TRANS(vamoswapw_v, 0, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamoaddw_v, 1, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamoxorw_v, 2, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamoandw_v, 3, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamoorw_v, 4, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamominw_v, 5, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamomaxw_v, 6, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamominuw_v, 7, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamomaxuw_v, 8, rwdvm, amo_op, amo_check)
+#ifdef TARGET_RISCV64
+GEN_VEXT_TRANS(vamoswapd_v, 9, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamoaddd_v, 10, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamoxord_v, 11, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamoandd_v, 12, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamoord_v, 13, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamomind_v, 14, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check)
+GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check)
+#endif
+
+/*
+ *** Vector Integer Arithmetic Instructions
+ */
+#define MAXSZ(s) (s->vlen >> (3 - s->lmul))
+
+static bool opivv_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_reg(s, a->rs1, false));
+}
+
+typedef void GVecGen3Fn(TCGContext *, unsigned, uint32_t, uint32_t,
+                        uint32_t, uint32_t, uint32_t);
+
+static inline bool
+do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
+              gen_helper_gvec_4_ptr *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGLabel *over = gen_new_label(tcg_ctx);
+    if (!opivv_check(s, a)) {
+        return false;
+    }
+
+    tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+    if (a->vm && s->vl_eq_vlmax) {
+        gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd),
+                vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
+                MAXSZ(s), MAXSZ(s));
+    } else {
+        uint32_t data = 0;
+
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+        FIELD_DP32(data, VDATA, VM, a->vm, data);
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+        tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+                           vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
+                           tcg_ctx->cpu_env, 0, s->vlen / 8, data, fn);
+    }
+    gen_set_label(tcg_ctx, over);
+    return true;
+}
+
+/* OPIVV with GVEC IR */
+#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
+{                                                                  \
+    static gen_helper_gvec_4_ptr * const fns[4] = {                \
+        gen_helper_##NAME##_b, gen_helper_##NAME##_h,              \
+        gen_helper_##NAME##_w, gen_helper_##NAME##_d,              \
+    };                                                             \
+    return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);   \
+}
+
+GEN_OPIVV_GVEC_TRANS(vadd_vv, add)
+GEN_OPIVV_GVEC_TRANS(vsub_vv, sub)
+
+typedef void gen_helper_opivx(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr,
+                              TCGv_env, TCGv_i32);
+
+static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
+                        gen_helper_opivx *fn, DisasContext *s)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_ptr dest, src2, mask;
+    TCGv src1;
+    TCGv_i32 desc;
+    uint32_t data = 0;
+
+    TCGLabel *over = gen_new_label(tcg_ctx);
+    tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+    dest = tcg_temp_new_ptr(tcg_ctx);
+    mask = tcg_temp_new_ptr(tcg_ctx);
+    src2 = tcg_temp_new_ptr(tcg_ctx);
+    src1 = tcg_temp_new(tcg_ctx);
+    gen_get_gpr(tcg_ctx, src1, rs1);
+
+    FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+    FIELD_DP32(data, VDATA, VM, vm, data);
+    FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+    desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+
+    tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd));
+    tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, vs2));
+    tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0));
+
+    fn(tcg_ctx, dest, mask, src1, src2, tcg_ctx->cpu_env, desc);
+
+    tcg_temp_free_ptr(tcg_ctx, dest);
+    tcg_temp_free_ptr(tcg_ctx, mask);
+    tcg_temp_free_ptr(tcg_ctx, src2);
+    tcg_temp_free(tcg_ctx, src1);
+    tcg_temp_free_i32(tcg_ctx, desc);
+    gen_set_label(tcg_ctx, over);
+    return true;
+}
+
+static bool opivx_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false));
+}
+
+typedef void GVecGen2sFn(TCGContext *, unsigned, uint32_t, uint32_t, TCGv_i64,
+                         uint32_t, uint32_t);
+
+static inline bool
+do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
+              gen_helper_opivx *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (!opivx_check(s, a)) {
+        return false;
+    }
+
+    if (a->vm && s->vl_eq_vlmax) {
+        TCGv_i64 src1 = tcg_temp_new_i64(tcg_ctx);
+        TCGv tmp = tcg_temp_new(tcg_ctx);
+
+        gen_get_gpr(tcg_ctx, tmp, a->rs1);
+        tcg_gen_ext_tl_i64(tcg_ctx, src1, tmp);
+        gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
+                src1, MAXSZ(s), MAXSZ(s));
+
+        tcg_temp_free_i64(tcg_ctx, src1);
+        tcg_temp_free(tcg_ctx, tmp);
+        return true;
+    }
+    return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
+}
+
+/* OPIVX with GVEC IR */
+#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
+{                                                                  \
+    static gen_helper_opivx * const fns[4] = {                     \
+        gen_helper_##NAME##_b, gen_helper_##NAME##_h,              \
+        gen_helper_##NAME##_w, gen_helper_##NAME##_d,              \
+    };                                                             \
+    return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);   \
+}
+
+GEN_OPIVX_GVEC_TRANS(vadd_vx, adds)
+GEN_OPIVX_GVEC_TRANS(vsub_vx, subs)
+
+static void gen_vec_rsub8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    tcg_gen_vec_sub8_i64(tcg_ctx, d, b, a);
+}
+
+static void gen_vec_rsub16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    tcg_gen_vec_sub16_i64(tcg_ctx, d, b, a);
+}
+
+static void gen_rsub_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_sub_i32(tcg_ctx, ret, arg2, arg1);
+}
+
+static void gen_rsub_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    tcg_gen_sub_i64(tcg_ctx, ret, arg2, arg1);
+}
+
+static void gen_rsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+    tcg_gen_sub_vec(tcg_ctx, vece, r, b, a);
+}
+
+static void tcg_gen_gvec_rsubs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                               TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+    static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
+    static const GVecGen2s rsub_op[4] = {
+        { .fni8 = gen_vec_rsub8_i64,
+          .fniv = gen_rsub_vec,
+          .fno = gen_helper_vec_rsubs8,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fni8 = gen_vec_rsub16_i64,
+          .fniv = gen_rsub_vec,
+          .fno = gen_helper_vec_rsubs16,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = gen_rsub_i32,
+          .fniv = gen_rsub_vec,
+          .fno = gen_helper_vec_rsubs32,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = gen_rsub_i64,
+          .fniv = gen_rsub_vec,
+          .fno = gen_helper_vec_rsubs64,
+          .opt_opc = vecop_list,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .vece = MO_64 },
+    };
+
+    tcg_debug_assert(vece <= MO_64);
+    tcg_gen_gvec_2s(tcg_ctx, dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
+}
+
+GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs)
+
+static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
+                        gen_helper_opivx *fn, DisasContext *s, int zx)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_ptr dest, src2, mask;
+    TCGv src1;
+    TCGv_i32 desc;
+    uint32_t data = 0;
+
+    TCGLabel *over = gen_new_label(tcg_ctx);
+    tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+    dest = tcg_temp_new_ptr(tcg_ctx);
+    mask = tcg_temp_new_ptr(tcg_ctx);
+    src2 = tcg_temp_new_ptr(tcg_ctx);
+    if (zx) {
+        src1 = tcg_const_tl(tcg_ctx, imm);
+    } else {
+        src1 = tcg_const_tl(tcg_ctx, sextract64(imm, 0, 5));
+    }
+    FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+    FIELD_DP32(data, VDATA, VM, vm, data);
+    FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+    desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+
+    tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd));
+    tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, vs2));
+    tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0));
+
+    fn(tcg_ctx, dest, mask, src1, src2, tcg_ctx->cpu_env, desc);
+
+    tcg_temp_free_ptr(tcg_ctx, dest);
+    tcg_temp_free_ptr(tcg_ctx, mask);
+    tcg_temp_free_ptr(tcg_ctx, src2);
+    tcg_temp_free(tcg_ctx, src1);
+    tcg_temp_free_i32(tcg_ctx, desc);
+    gen_set_label(tcg_ctx, over);
+    return true;
+}
+
+typedef void GVecGen2iFn(TCGContext *, unsigned, uint32_t, uint32_t, int64_t,
+                         uint32_t, uint32_t);
+
+static inline bool
+do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
+              gen_helper_opivx *fn, int zx)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (!opivx_check(s, a)) {
+        return false;
+    }
+
+    if (a->vm && s->vl_eq_vlmax) {
+        if (zx) {
+            gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
+                    extract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s));
+        } else {
+            gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
+                    sextract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s));
+        }
+    } else {
+        return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, zx);
+    }
+    return true;
+}
+
+/* OPIVI with GVEC IR */
+#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, SUF) \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
+{                                                                  \
+    static gen_helper_opivx * const fns[4] = {                     \
+        gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h,            \
+        gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d,            \
+    };                                                             \
+    return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF,                 \
+                         fns[s->sew], ZX);                         \
+}
+
+GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi)
+
+static void tcg_gen_gvec_rsubi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                               int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+    TCGv_i64 tmp = tcg_const_i64(tcg_ctx, c);
+    tcg_gen_gvec_rsubs(tcg_ctx, vece, dofs, aofs, tmp, oprsz, maxsz);
+    tcg_temp_free_i64(tcg_ctx, tmp);
+}
+
+GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi)
+
+/* Vector Widening Integer Add/Subtract */
+
+/* OPIVV with WIDEN */
+static bool opivv_widen_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, true) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_reg(s, a->rs1, false) &&
+            vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
+                                     1 << s->lmul) &&
+            vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
+                                     1 << s->lmul) &&
+            (s->lmul < 0x3) && (s->sew < 0x3));
+}
+
+static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
+                           gen_helper_gvec_4_ptr *fn,
+                           bool (*checkfn)(DisasContext *, arg_rmrr *))
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (checkfn(s, a)) {
+        uint32_t data = 0;
+        TCGLabel *over = gen_new_label(tcg_ctx);
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+        FIELD_DP32(data, VDATA, VM, a->vm, data);
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+        tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+                           vreg_ofs(s, a->rs1),
+                           vreg_ofs(s, a->rs2),
+                           tcg_ctx->cpu_env, 0, s->vlen / 8,
+                           data, fn);
+        gen_set_label(tcg_ctx, over);
+        return true;
+    }
+    return false;
+}
+
+#define GEN_OPIVV_WIDEN_TRANS(NAME, CHECK) \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
+{                                                            \
+    static gen_helper_gvec_4_ptr * const fns[3] = {          \
+        gen_helper_##NAME##_b,                               \
+        gen_helper_##NAME##_h,                               \
+        gen_helper_##NAME##_w                                \
+    };                                                       \
+    return do_opivv_widen(s, a, fns[s->sew], CHECK);         \
+}
+
+GEN_OPIVV_WIDEN_TRANS(vwaddu_vv, opivv_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwadd_vv, opivv_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwsubu_vv, opivv_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check)
+
+/* OPIVX with WIDEN */
+static bool opivx_widen_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, true) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
+                                     1 << s->lmul) &&
+            (s->lmul < 0x3) && (s->sew < 0x3));
+}
+
+static bool do_opivx_widen(DisasContext *s, arg_rmrr *a,
+                           gen_helper_opivx *fn)
+{
+    if (opivx_widen_check(s, a)) {
+        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
+    }
+    return false;
+}
+
+#define GEN_OPIVX_WIDEN_TRANS(NAME) \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
+{                                                            \
+    static gen_helper_opivx * const fns[3] = {               \
+        gen_helper_##NAME##_b,                               \
+        gen_helper_##NAME##_h,                               \
+        gen_helper_##NAME##_w                                \
+    };                                                       \
+    return do_opivx_widen(s, a, fns[s->sew]);                \
+}
+
+GEN_OPIVX_WIDEN_TRANS(vwaddu_vx)
+GEN_OPIVX_WIDEN_TRANS(vwadd_vx)
+GEN_OPIVX_WIDEN_TRANS(vwsubu_vx)
+GEN_OPIVX_WIDEN_TRANS(vwsub_vx)
+
+/* WIDEN OPIVV with WIDEN */
+static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, true) &&
+            vext_check_reg(s, a->rs2, true) &&
+            vext_check_reg(s, a->rs1, false) &&
+            vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
+                                     1 << s->lmul) &&
+            (s->lmul < 0x3) && (s->sew < 0x3));
+}
+
+static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
+                           gen_helper_gvec_4_ptr *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (opiwv_widen_check(s, a)) {
+        uint32_t data = 0;
+        TCGLabel *over = gen_new_label(tcg_ctx);
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+        FIELD_DP32(data, VDATA, VM, a->vm, data);
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+        tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+                           vreg_ofs(s, a->rs1),
+                           vreg_ofs(s, a->rs2),
+                           tcg_ctx->cpu_env, 0, s->vlen / 8, data, fn);
+        gen_set_label(tcg_ctx, over);
+        return true;
+    }
+    return false;
+}
+
+#define GEN_OPIWV_WIDEN_TRANS(NAME) \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
+{                                                            \
+    static gen_helper_gvec_4_ptr * const fns[3] = {          \
+        gen_helper_##NAME##_b,                               \
+        gen_helper_##NAME##_h,                               \
+        gen_helper_##NAME##_w                                \
+    };                                                       \
+    return do_opiwv_widen(s, a, fns[s->sew]);                \
+}
+
+GEN_OPIWV_WIDEN_TRANS(vwaddu_wv)
+GEN_OPIWV_WIDEN_TRANS(vwadd_wv)
+GEN_OPIWV_WIDEN_TRANS(vwsubu_wv)
+GEN_OPIWV_WIDEN_TRANS(vwsub_wv)
+
+/* WIDEN OPIVX with WIDEN */
+static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, true) &&
+            vext_check_reg(s, a->rs2, true) &&
+            (s->lmul < 0x3) && (s->sew < 0x3));
+}
+
+static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a,
+                           gen_helper_opivx *fn)
+{
+    if (opiwx_widen_check(s, a)) {
+        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
+    }
+    return false;
+}
+
+#define GEN_OPIWX_WIDEN_TRANS(NAME) \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)       \
+{                                                            \
+    static gen_helper_opivx * const fns[3] = {               \
+        gen_helper_##NAME##_b,                               \
+        gen_helper_##NAME##_h,                               \
+        gen_helper_##NAME##_w                                \
+    };                                                       \
+    return do_opiwx_widen(s, a, fns[s->sew]);                \
+}
+
+GEN_OPIWX_WIDEN_TRANS(vwaddu_wx)
+GEN_OPIWX_WIDEN_TRANS(vwadd_wx)
+GEN_OPIWX_WIDEN_TRANS(vwsubu_wx)
+GEN_OPIWX_WIDEN_TRANS(vwsub_wx)
+
+/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
+/* OPIVV without GVEC IR */
+#define GEN_OPIVV_TRANS(NAME, CHECK)                               \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
+{                                                                  \
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;                          \
+    if (CHECK(s, a)) {                                             \
+        uint32_t data = 0;                                         \
+        static gen_helper_gvec_4_ptr * const fns[4] = {            \
+            gen_helper_##NAME##_b, gen_helper_##NAME##_h,          \
+            gen_helper_##NAME##_w, gen_helper_##NAME##_d,          \
+        };                                                         \
+        TCGLabel *over = gen_new_label(tcg_ctx);                          \
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);          \
+                                                                   \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);             \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);                 \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);             \
+        tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
+                           vreg_ofs(s, a->rs1),                    \
+                           vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0,        \
+                           s->vlen / 8, data, fns[s->sew]);        \
+        gen_set_label(tcg_ctx, over);                                       \
+        return true;                                               \
+    }                                                              \
+    return false;                                                  \
+}
+
+/*
+ * For vadc and vsbc, an illegal instruction exception is raised if the
+ * destination vector register is v0 and LMUL > 1. (Section 12.3)
+ */
+static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_reg(s, a->rs1, false) &&
+            ((a->rd != 0) || (s->lmul == 0)));
+}
+
+GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check)
+GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check)
+
+/*
+ * For vmadc and vmsbc, an illegal instruction exception is raised if the
+ * destination vector register overlaps a source vector register group.
+ */
+static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_reg(s, a->rs1, false) &&
+            vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) &&
+            vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul));
+}
+
+GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check)
+GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check)
+
+static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            ((a->rd != 0) || (s->lmul == 0)));
+}
+
+/* OPIVX without GVEC IR */
+#define GEN_OPIVX_TRANS(NAME, CHECK)                                     \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
+{                                                                        \
+    if (CHECK(s, a)) {                                                   \
+        static gen_helper_opivx * const fns[4] = {                       \
+            gen_helper_##NAME##_b, gen_helper_##NAME##_h,                \
+            gen_helper_##NAME##_w, gen_helper_##NAME##_d,                \
+        };                                                               \
+                                                                         \
+        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
+    }                                                                    \
+    return false;                                                        \
+}
+
+GEN_OPIVX_TRANS(vadc_vxm, opivx_vadc_check)
+GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check)
+
+static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul));
+}
+
+GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check)
+GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check)
+
+/* OPIVI without GVEC IR */
+#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK)                          \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
+{                                                                        \
+    if (CHECK(s, a)) {                                                   \
+        static gen_helper_opivx * const fns[4] = {                       \
+            gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h,              \
+            gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d,              \
+        };                                                               \
+        return opivi_trans(a->rd, a->rs1, a->rs2, a->vm,                 \
+                           fns[s->sew], s, ZX);                          \
+    }                                                                    \
+    return false;                                                        \
+}
+
+GEN_OPIVI_TRANS(vadc_vim, 0, vadc_vxm, opivx_vadc_check)
+GEN_OPIVI_TRANS(vmadc_vim, 0, vmadc_vxm, opivx_vmadc_check)
+
+/* Vector Bitwise Logical Instructions */
+GEN_OPIVV_GVEC_TRANS(vand_vv, and)
+GEN_OPIVV_GVEC_TRANS(vor_vv,  or)
+GEN_OPIVV_GVEC_TRANS(vxor_vv, xor)
+GEN_OPIVX_GVEC_TRANS(vand_vx, ands)
+GEN_OPIVX_GVEC_TRANS(vor_vx,  ors)
+GEN_OPIVX_GVEC_TRANS(vxor_vx, xors)
+GEN_OPIVI_GVEC_TRANS(vand_vi, 0, vand_vx, andi)
+GEN_OPIVI_GVEC_TRANS(vor_vi, 0, vor_vx,  ori)
+GEN_OPIVI_GVEC_TRANS(vxor_vi, 0, vxor_vx, xori)
+
+/* Vector Single-Width Bit Shift Instructions */
+GEN_OPIVV_GVEC_TRANS(vsll_vv,  shlv)
+GEN_OPIVV_GVEC_TRANS(vsrl_vv,  shrv)
+GEN_OPIVV_GVEC_TRANS(vsra_vv,  sarv)
+
+typedef void GVecGen2sFn32(TCGContext *, unsigned, uint32_t, uint32_t, TCGv_i32,
+                           uint32_t, uint32_t);
+
+static inline bool
+do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
+                    gen_helper_opivx *fn)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (!opivx_check(s, a)) {
+        return false;
+    }
+
+    if (a->vm && s->vl_eq_vlmax) {
+        TCGv_i32 src1 = tcg_temp_new_i32(tcg_ctx);
+        TCGv tmp = tcg_temp_new(tcg_ctx);
+
+        gen_get_gpr(tcg_ctx, tmp, a->rs1);
+        tcg_gen_trunc_tl_i32(tcg_ctx, src1, tmp);
+        tcg_gen_extract_i32(tcg_ctx, src1, src1, 0, s->sew + 3);
+        gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
+                src1, MAXSZ(s), MAXSZ(s));
+
+        tcg_temp_free_i32(tcg_ctx, src1);
+        tcg_temp_free(tcg_ctx, tmp);
+        return true;
+    }
+    return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
+}
+
+#define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                    \
+{                                                                         \
+    static gen_helper_opivx * const fns[4] = {                            \
+        gen_helper_##NAME##_b, gen_helper_##NAME##_h,                     \
+        gen_helper_##NAME##_w, gen_helper_##NAME##_d,                     \
+    };                                                                    \
+                                                                          \
+    return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]);    \
+}
+
+GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx,  shls)
+GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx,  shrs)
+GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx,  sars)
+
+GEN_OPIVI_GVEC_TRANS(vsll_vi, 1, vsll_vx,  shli)
+GEN_OPIVI_GVEC_TRANS(vsrl_vi, 1, vsrl_vx,  shri)
+GEN_OPIVI_GVEC_TRANS(vsra_vi, 1, vsra_vx,  sari)
+
+/* Vector Narrowing Integer Right Shift Instructions */
+static bool opivv_narrow_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, true) &&
+            vext_check_reg(s, a->rs1, false) &&
+            vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
+                2 << s->lmul) &&
+            (s->lmul < 0x3) && (s->sew < 0x3));
+}
+
+/* OPIVV with NARROW */
+#define GEN_OPIVV_NARROW_TRANS(NAME)                               \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
+{                                                                  \
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;                          \
+    if (opivv_narrow_check(s, a)) {                                \
+        uint32_t data = 0;                                         \
+        static gen_helper_gvec_4_ptr * const fns[3] = {            \
+            gen_helper_##NAME##_b,                                 \
+            gen_helper_##NAME##_h,                                 \
+            gen_helper_##NAME##_w,                                 \
+        };                                                         \
+        TCGLabel *over = gen_new_label(tcg_ctx);                          \
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);          \
+                                                                   \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);             \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);                 \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);             \
+        tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
+                           vreg_ofs(s, a->rs1),                    \
+                           vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0,        \
+                           s->vlen / 8, data, fns[s->sew]);        \
+        gen_set_label(tcg_ctx, over);                                       \
+        return true;                                               \
+    }                                                              \
+    return false;                                                  \
+}
+GEN_OPIVV_NARROW_TRANS(vnsra_vv)
+GEN_OPIVV_NARROW_TRANS(vnsrl_vv)
+
+static bool opivx_narrow_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, true) &&
+            vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
+                2 << s->lmul) &&
+            (s->lmul < 0x3) && (s->sew < 0x3));
+}
+
+/* OPIVX with NARROW */
+#define GEN_OPIVX_NARROW_TRANS(NAME)                                     \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
+{                                                                        \
+    if (opivx_narrow_check(s, a)) {                                      \
+        static gen_helper_opivx * const fns[3] = {                       \
+            gen_helper_##NAME##_b,                                       \
+            gen_helper_##NAME##_h,                                       \
+            gen_helper_##NAME##_w,                                       \
+        };                                                               \
+        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
+    }                                                                    \
+    return false;                                                        \
+}
+
+GEN_OPIVX_NARROW_TRANS(vnsra_vx)
+GEN_OPIVX_NARROW_TRANS(vnsrl_vx)
+
+/* OPIVI with NARROW */
+#define GEN_OPIVI_NARROW_TRANS(NAME, ZX, OPIVX)                          \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
+{                                                                        \
+    if (opivx_narrow_check(s, a)) {                                      \
+        static gen_helper_opivx * const fns[3] = {                       \
+            gen_helper_##OPIVX##_b,                                      \
+            gen_helper_##OPIVX##_h,                                      \
+            gen_helper_##OPIVX##_w,                                      \
+        };                                                               \
+        return opivi_trans(a->rd, a->rs1, a->rs2, a->vm,                 \
+                           fns[s->sew], s, ZX);                          \
+    }                                                                    \
+    return false;                                                        \
+}
+
+GEN_OPIVI_NARROW_TRANS(vnsra_vi, 1, vnsra_vx)
+GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx)
+
+/* Vector Integer Comparison Instructions */
+/*
+ * For all comparison instructions, an illegal instruction exception is raised
+ * if the destination vector register overlaps a source vector register group
+ * and LMUL > 1.
+ */
+static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_reg(s, a->rs1, false) &&
+            ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) &&
+              vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) ||
+             (s->lmul == 0)));
+}
+GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check)
+GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check)
+GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check)
+GEN_OPIVV_TRANS(vmslt_vv, opivv_cmp_check)
+GEN_OPIVV_TRANS(vmsleu_vv, opivv_cmp_check)
+GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check)
+
+static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rs2, false) &&
+            (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) ||
+             (s->lmul == 0)));
+}
+
+GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check)
+GEN_OPIVX_TRANS(vmsne_vx, opivx_cmp_check)
+GEN_OPIVX_TRANS(vmsltu_vx, opivx_cmp_check)
+GEN_OPIVX_TRANS(vmslt_vx, opivx_cmp_check)
+GEN_OPIVX_TRANS(vmsleu_vx, opivx_cmp_check)
+GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check)
+GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check)
+GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check)
+
+GEN_OPIVI_TRANS(vmseq_vi, 0, vmseq_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmsne_vi, 0, vmsne_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmsleu_vi, 1, vmsleu_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmsle_vi, 0, vmsle_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmsgtu_vi, 1, vmsgtu_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmsgt_vi, 0, vmsgt_vx, opivx_cmp_check)
+
+/* Vector Integer Min/Max Instructions */
+GEN_OPIVV_GVEC_TRANS(vminu_vv, umin)
+GEN_OPIVV_GVEC_TRANS(vmin_vv,  smin)
+GEN_OPIVV_GVEC_TRANS(vmaxu_vv, umax)
+GEN_OPIVV_GVEC_TRANS(vmax_vv,  smax)
+GEN_OPIVX_TRANS(vminu_vx, opivx_check)
+GEN_OPIVX_TRANS(vmin_vx,  opivx_check)
+GEN_OPIVX_TRANS(vmaxu_vx, opivx_check)
+GEN_OPIVX_TRANS(vmax_vx,  opivx_check)
+
+/* Vector Single-Width Integer Multiply Instructions */
+GEN_OPIVV_GVEC_TRANS(vmul_vv,  mul)
+GEN_OPIVV_TRANS(vmulh_vv, opivv_check)
+GEN_OPIVV_TRANS(vmulhu_vv, opivv_check)
+GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check)
+GEN_OPIVX_GVEC_TRANS(vmul_vx,  muls)
+GEN_OPIVX_TRANS(vmulh_vx, opivx_check)
+GEN_OPIVX_TRANS(vmulhu_vx, opivx_check)
+GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check)
+
+/* Vector Integer Divide Instructions */
+GEN_OPIVV_TRANS(vdivu_vv, opivv_check)
+GEN_OPIVV_TRANS(vdiv_vv, opivv_check)
+GEN_OPIVV_TRANS(vremu_vv, opivv_check)
+GEN_OPIVV_TRANS(vrem_vv, opivv_check)
+GEN_OPIVX_TRANS(vdivu_vx, opivx_check)
+GEN_OPIVX_TRANS(vdiv_vx, opivx_check)
+GEN_OPIVX_TRANS(vremu_vx, opivx_check)
+GEN_OPIVX_TRANS(vrem_vx, opivx_check)
+
+/* Vector Widening Integer Multiply Instructions */
+GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmul_vx)
+GEN_OPIVX_WIDEN_TRANS(vwmulu_vx)
+GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx)
+
+/* Vector Single-Width Integer Multiply-Add Instructions */
+GEN_OPIVV_TRANS(vmacc_vv, opivv_check)
+GEN_OPIVV_TRANS(vnmsac_vv, opivv_check)
+GEN_OPIVV_TRANS(vmadd_vv, opivv_check)
+GEN_OPIVV_TRANS(vnmsub_vv, opivv_check)
+GEN_OPIVX_TRANS(vmacc_vx, opivx_check)
+GEN_OPIVX_TRANS(vnmsac_vx, opivx_check)
+GEN_OPIVX_TRANS(vmadd_vx, opivx_check)
+GEN_OPIVX_TRANS(vnmsub_vx, opivx_check)
+
+/* Vector Widening Integer Multiply-Add Instructions */
+GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx)
+GEN_OPIVX_WIDEN_TRANS(vwmacc_vx)
+GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx)
+GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx)
+
+/* Vector Integer Merge and Move Instructions */
+static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (vext_check_isa_ill(s) &&
+        vext_check_reg(s, a->rd, false) &&
+        vext_check_reg(s, a->rs1, false)) {
+
+        if (s->vl_eq_vlmax) {
+            tcg_gen_gvec_mov(tcg_ctx, s->sew, vreg_ofs(s, a->rd),
+                             vreg_ofs(s, a->rs1),
+                             MAXSZ(s), MAXSZ(s));
+        } else {
+            uint32_t data;
+            FIELD_DP32(0, VDATA, LMUL, s->lmul, data);
+            static gen_helper_gvec_2_ptr * const fns[4] = {
+                gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h,
+                gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
+            };
+            TCGLabel *over = gen_new_label(tcg_ctx);
+            tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+            tcg_gen_gvec_2_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
+                               tcg_ctx->cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
+            gen_set_label(tcg_ctx, over);
+        }
+        return true;
+    }
+    return false;
+}
+
+typedef void gen_helper_vmv_vx(TCGContext *, TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32);
+static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    if (vext_check_isa_ill(s) &&
+        vext_check_reg(s, a->rd, false)) {
+
+        TCGv s1;
+        TCGLabel *over = gen_new_label(tcg_ctx);
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+        s1 = tcg_temp_new(tcg_ctx);
+        gen_get_gpr(tcg_ctx, s1, a->rs1);
+
+        if (s->vl_eq_vlmax) {
+            tcg_gen_gvec_dup_tl(tcg_ctx, s->sew, vreg_ofs(s, a->rd),
+                                MAXSZ(s), MAXSZ(s), s1);
+        } else {
+            TCGv_i32 desc ;
+            TCGv_i64 s1_i64 = tcg_temp_new_i64(tcg_ctx);
+            TCGv_ptr dest = tcg_temp_new_ptr(tcg_ctx);
+            uint32_t data;
+            FIELD_DP32(0, VDATA, LMUL, s->lmul, data);
+            static gen_helper_vmv_vx * const fns[4] = {
+                gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
+                gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
+            };
+
+            tcg_gen_ext_tl_i64(tcg_ctx, s1_i64, s1);
+            desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+            tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, a->rd));
+            fns[s->sew](tcg_ctx, dest, s1_i64, tcg_ctx->cpu_env, desc);
+
+            tcg_temp_free_ptr(tcg_ctx, dest);
+            tcg_temp_free_i32(tcg_ctx, desc);
+            tcg_temp_free_i64(tcg_ctx, s1_i64);
+        }
+
+        tcg_temp_free(tcg_ctx, s1);
+        gen_set_label(tcg_ctx, over);
+        return true;
+    }
+    return false;
+}
+
+static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    if (vext_check_isa_ill(s) &&
+        vext_check_reg(s, a->rd, false)) {
+
+        int64_t simm = sextract64(a->rs1, 0, 5);
+        if (s->vl_eq_vlmax) {
+            tcg_gen_gvec_dup_imm(tcg_ctx, s->sew, vreg_ofs(s, a->rd),
+                                 MAXSZ(s), MAXSZ(s), simm);
+        } else {
+            TCGv_i32 desc;
+            TCGv_i64 s1;
+            TCGv_ptr dest;
+            uint32_t data;
+            FIELD_DP32(0, VDATA, LMUL, s->lmul, data);
+            static gen_helper_vmv_vx * const fns[4] = {
+                gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
+                gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
+            };
+            TCGLabel *over = gen_new_label(tcg_ctx);
+            tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+            s1 = tcg_const_i64(tcg_ctx, simm);
+            dest = tcg_temp_new_ptr(tcg_ctx);
+            desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+            tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, a->rd));
+            fns[s->sew](tcg_ctx, dest, s1, tcg_ctx->cpu_env, desc);
+
+            tcg_temp_free_ptr(tcg_ctx, dest);
+            tcg_temp_free_i32(tcg_ctx, desc);
+            tcg_temp_free_i64(tcg_ctx, s1);
+            gen_set_label(tcg_ctx, over);
+        }
+        return true;
+    }
+    return false;
+}
+
+GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check)
+GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check)
+GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vadc_check)
+
+/*
+ *** Vector Fixed-Point Arithmetic Instructions
+ */
+
+/* Vector Single-Width Saturating Add and Subtract */
+GEN_OPIVV_TRANS(vsaddu_vv, opivv_check)
+GEN_OPIVV_TRANS(vsadd_vv,  opivv_check)
+GEN_OPIVV_TRANS(vssubu_vv, opivv_check)
+GEN_OPIVV_TRANS(vssub_vv,  opivv_check)
+GEN_OPIVX_TRANS(vsaddu_vx,  opivx_check)
+GEN_OPIVX_TRANS(vsadd_vx,  opivx_check)
+GEN_OPIVX_TRANS(vssubu_vx,  opivx_check)
+GEN_OPIVX_TRANS(vssub_vx,  opivx_check)
+GEN_OPIVI_TRANS(vsaddu_vi, 1, vsaddu_vx, opivx_check)
+GEN_OPIVI_TRANS(vsadd_vi, 0, vsadd_vx, opivx_check)
+
+/* Vector Single-Width Averaging Add and Subtract */
+GEN_OPIVV_TRANS(vaadd_vv, opivv_check)
+GEN_OPIVV_TRANS(vasub_vv, opivv_check)
+GEN_OPIVX_TRANS(vaadd_vx,  opivx_check)
+GEN_OPIVX_TRANS(vasub_vx,  opivx_check)
+GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check)
+
+/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
+GEN_OPIVV_TRANS(vsmul_vv, opivv_check)
+GEN_OPIVX_TRANS(vsmul_vx,  opivx_check)
+
+/* Vector Widening Saturating Scaled Multiply-Add */
+GEN_OPIVV_WIDEN_TRANS(vwsmaccu_vv, opivv_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwsmacc_vv, opivv_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwsmaccsu_vv, opivv_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx)
+GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx)
+GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx)
+GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx)
+
+/* Vector Single-Width Scaling Shift Instructions */
+GEN_OPIVV_TRANS(vssrl_vv, opivv_check)
+GEN_OPIVV_TRANS(vssra_vv, opivv_check)
+GEN_OPIVX_TRANS(vssrl_vx,  opivx_check)
+GEN_OPIVX_TRANS(vssra_vx,  opivx_check)
+GEN_OPIVI_TRANS(vssrl_vi, 1, vssrl_vx, opivx_check)
+GEN_OPIVI_TRANS(vssra_vi, 0, vssra_vx, opivx_check)
+
+/* Vector Narrowing Fixed-Point Clip Instructions */
+GEN_OPIVV_NARROW_TRANS(vnclipu_vv)
+GEN_OPIVV_NARROW_TRANS(vnclip_vv)
+GEN_OPIVX_NARROW_TRANS(vnclipu_vx)
+GEN_OPIVX_NARROW_TRANS(vnclip_vx)
+GEN_OPIVI_NARROW_TRANS(vnclipu_vi, 1, vnclipu_vx)
+GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx)
+
+/*
+ *** Vector Float Point Arithmetic Instructions
+ */
+/* Vector Single-Width Floating-Point Add/Subtract Instructions */
+
+/*
+ * If the current SEW does not correspond to a supported IEEE floating-point
+ * type, an illegal instruction exception is raised.
+ */
+static bool opfvv_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_reg(s, a->rs1, false) &&
+            (s->sew != 0));
+}
+
+/* OPFVV without GVEC IR */
+#define GEN_OPFVV_TRANS(NAME, CHECK)                               \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
+{                                                                  \
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;                          \
+    if (CHECK(s, a)) {                                             \
+        uint32_t data = 0;                                         \
+        static gen_helper_gvec_4_ptr * const fns[3] = {            \
+            gen_helper_##NAME##_h,                                 \
+            gen_helper_##NAME##_w,                                 \
+            gen_helper_##NAME##_d,                                 \
+        };                                                         \
+        TCGLabel *over = gen_new_label(tcg_ctx);                          \
+        gen_set_rm(s, 7);                                          \
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);          \
+                                                                   \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);             \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);                 \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);             \
+        tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
+                           vreg_ofs(s, a->rs1),                    \
+                           vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0,        \
+                           s->vlen / 8, data, fns[s->sew - 1]);    \
+        gen_set_label(tcg_ctx, over);                                       \
+        return true;                                               \
+    }                                                              \
+    return false;                                                  \
+}
+GEN_OPFVV_TRANS(vfadd_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfsub_vv, opfvv_check)
+
+typedef void gen_helper_opfvf(TCGContext* tcg_ctx, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr,
+                              TCGv_env, TCGv_i32);
+
+static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
+                        uint32_t data, gen_helper_opfvf *fn, DisasContext *s)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_ptr dest, src2, mask;
+    TCGv_i32 desc;
+
+    TCGLabel *over = gen_new_label(tcg_ctx);
+    tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+    dest = tcg_temp_new_ptr(tcg_ctx);
+    mask = tcg_temp_new_ptr(tcg_ctx);
+    src2 = tcg_temp_new_ptr(tcg_ctx);
+    desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+
+    tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd));
+    tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, vs2));
+    tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0));
+
+    fn(tcg_ctx, dest, mask, tcg_ctx->cpu_fpr[rs1], src2, tcg_ctx->cpu_env, desc);
+
+    tcg_temp_free_ptr(tcg_ctx, dest);
+    tcg_temp_free_ptr(tcg_ctx, mask);
+    tcg_temp_free_ptr(tcg_ctx, src2);
+    tcg_temp_free_i32(tcg_ctx, desc);
+    gen_set_label(tcg_ctx, over);
+    return true;
+}
+
+static bool opfvf_check(DisasContext *s, arg_rmrr *a)
+{
+/*
+ * If the current SEW does not correspond to a supported IEEE floating-point
+ * type, an illegal instruction exception is raised
+ */
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            (s->sew != 0));
+}
+
+/* OPFVF without GVEC IR */
+#define GEN_OPFVF_TRANS(NAME, CHECK)                              \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)            \
+{                                                                 \
+    if (CHECK(s, a)) {                                            \
+        uint32_t data = 0;                                        \
+        static gen_helper_opfvf *const fns[3] = {                 \
+            gen_helper_##NAME##_h,                                \
+            gen_helper_##NAME##_w,                                \
+            gen_helper_##NAME##_d,                                \
+        };                                                        \
+        gen_set_rm(s, 7);                                         \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);             \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);                 \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);             \
+        return opfvf_trans(a->rd, a->rs1, a->rs2, data,           \
+                           fns[s->sew - 1], s);                   \
+    }                                                             \
+    return false;                                                 \
+}
+
+GEN_OPFVF_TRANS(vfadd_vf,  opfvf_check)
+GEN_OPFVF_TRANS(vfsub_vf,  opfvf_check)
+GEN_OPFVF_TRANS(vfrsub_vf,  opfvf_check)
+
+/* Vector Widening Floating-Point Add/Subtract Instructions */
+static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, true) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_reg(s, a->rs1, false) &&
+            vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
+                                     1 << s->lmul) &&
+            vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
+                                     1 << s->lmul) &&
+            (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+}
+
+/* OPFVV with WIDEN */
+#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK)                       \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
+{                                                                \
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;                        \
+    if (CHECK(s, a)) {                                           \
+        uint32_t data = 0;                                       \
+        static gen_helper_gvec_4_ptr * const fns[2] = {          \
+            gen_helper_##NAME##_h, gen_helper_##NAME##_w,        \
+        };                                                       \
+        TCGLabel *over = gen_new_label(tcg_ctx);                        \
+        gen_set_rm(s, 7);                                        \
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);        \
+                                                                 \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);           \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);               \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);           \
+        tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),   \
+                           vreg_ofs(s, a->rs1),                  \
+                           vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0,      \
+                           s->vlen / 8, data, fns[s->sew - 1]);  \
+        gen_set_label(tcg_ctx, over);                                     \
+        return true;                                             \
+    }                                                            \
+    return false;                                                \
+}
+
+GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check)
+GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check)
+
+static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, true) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
+                                     1 << s->lmul) &&
+            (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+}
+
+/* OPFVF with WIDEN */
+#define GEN_OPFVF_WIDEN_TRANS(NAME)                              \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
+{                                                                \
+    if (opfvf_widen_check(s, a)) {                               \
+        uint32_t data = 0;                                       \
+        static gen_helper_opfvf *const fns[2] = {                \
+            gen_helper_##NAME##_h, gen_helper_##NAME##_w,        \
+        };                                                       \
+        gen_set_rm(s, 7);                                        \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);           \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);               \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);           \
+        return opfvf_trans(a->rd, a->rs1, a->rs2, data,          \
+                           fns[s->sew - 1], s);                  \
+    }                                                            \
+    return false;                                                \
+}
+
+GEN_OPFVF_WIDEN_TRANS(vfwadd_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwsub_vf)
+
+static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, true) &&
+            vext_check_reg(s, a->rs2, true) &&
+            vext_check_reg(s, a->rs1, false) &&
+            vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
+                                     1 << s->lmul) &&
+            (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+}
+
+/* WIDEN OPFVV with WIDEN */
+#define GEN_OPFWV_WIDEN_TRANS(NAME)                                \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
+{                                                                  \
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;                          \
+    if (opfwv_widen_check(s, a)) {                                 \
+        uint32_t data = 0;                                         \
+        static gen_helper_gvec_4_ptr * const fns[2] = {            \
+            gen_helper_##NAME##_h, gen_helper_##NAME##_w,          \
+        };                                                         \
+        TCGLabel *over = gen_new_label(tcg_ctx);                          \
+        gen_set_rm(s, 7);                                          \
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);          \
+                                                                   \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);             \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);                 \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);             \
+        tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
+                           vreg_ofs(s, a->rs1),                    \
+                           vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0,        \
+                           s->vlen / 8, data, fns[s->sew - 1]);    \
+        gen_set_label(tcg_ctx, over);                                       \
+        return true;                                               \
+    }                                                              \
+    return false;                                                  \
+}
+
+GEN_OPFWV_WIDEN_TRANS(vfwadd_wv)
+GEN_OPFWV_WIDEN_TRANS(vfwsub_wv)
+
+static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, true) &&
+            vext_check_reg(s, a->rs2, true) &&
+            (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+}
+
+/* WIDEN OPFVF with WIDEN */
+#define GEN_OPFWF_WIDEN_TRANS(NAME)                              \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
+{                                                                \
+    if (opfwf_widen_check(s, a)) {                               \
+        uint32_t data = 0;                                       \
+        static gen_helper_opfvf *const fns[2] = {                \
+            gen_helper_##NAME##_h, gen_helper_##NAME##_w,        \
+        };                                                       \
+        gen_set_rm(s, 7);                                        \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);           \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);               \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);           \
+        return opfvf_trans(a->rd, a->rs1, a->rs2, data,          \
+                           fns[s->sew - 1], s);                  \
+    }                                                            \
+    return false;                                                \
+}
+
+GEN_OPFWF_WIDEN_TRANS(vfwadd_wf)
+GEN_OPFWF_WIDEN_TRANS(vfwsub_wf)
+
+/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
+GEN_OPFVV_TRANS(vfmul_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check)
+GEN_OPFVF_TRANS(vfmul_vf,  opfvf_check)
+GEN_OPFVF_TRANS(vfdiv_vf,  opfvf_check)
+GEN_OPFVF_TRANS(vfrdiv_vf,  opfvf_check)
+
+/* Vector Widening Floating-Point Multiply */
+GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwmul_vf)
+
+/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
+GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check)
+GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check)
+GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check)
+GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check)
+GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check)
+GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check)
+GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check)
+GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check)
+GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check)
+
+/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
+GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check)
+GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check)
+GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check)
+GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf)
+
+/* Vector Floating-Point Square-Root Instruction */
+
+/*
+ * If the current SEW does not correspond to a supported IEEE floating-point
+ * type, an illegal instruction exception is raised
+ */
+static bool opfv_check(DisasContext *s, arg_rmr *a)
+{
+   return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            (s->sew != 0));
+}
+
+#define GEN_OPFV_TRANS(NAME, CHECK)                                \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
+{                                                                  \
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;                          \
+    if (CHECK(s, a)) {                                             \
+        uint32_t data = 0;                                         \
+        static gen_helper_gvec_3_ptr * const fns[3] = {            \
+            gen_helper_##NAME##_h,                                 \
+            gen_helper_##NAME##_w,                                 \
+            gen_helper_##NAME##_d,                                 \
+        };                                                         \
+        TCGLabel *over = gen_new_label(tcg_ctx);                          \
+        gen_set_rm(s, 7);                                          \
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);          \
+                                                                   \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);             \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);                 \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);             \
+        tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
+                           vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0,        \
+                           s->vlen / 8, data, fns[s->sew - 1]);    \
+        gen_set_label(tcg_ctx, over);                                       \
+        return true;                                               \
+    }                                                              \
+    return false;                                                  \
+}
+
+GEN_OPFV_TRANS(vfsqrt_v, opfv_check)
+
+/* Vector Floating-Point MIN/MAX Instructions */
+GEN_OPFVV_TRANS(vfmin_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfmax_vv, opfvv_check)
+GEN_OPFVF_TRANS(vfmin_vf, opfvf_check)
+GEN_OPFVF_TRANS(vfmax_vf, opfvf_check)
+
+/* Vector Floating-Point Sign-Injection Instructions */
+GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check)
+GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check)
+GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check)
+GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check)
+GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check)
+
+/* Vector Floating-Point Compare Instructions */
+static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_reg(s, a->rs1, false) &&
+            (s->sew != 0) &&
+            ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) &&
+              vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) ||
+             (s->lmul == 0)));
+}
+
+GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check)
+GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check)
+GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check)
+GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check)
+GEN_OPFVV_TRANS(vmford_vv, opfvv_cmp_check)
+
+static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rs2, false) &&
+            (s->sew != 0) &&
+            (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) ||
+             (s->lmul == 0)));
+}
+
+GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check)
+GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check)
+GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check)
+GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check)
+GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check)
+GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check)
+GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check)
+
+/* Vector Floating-Point Classify Instruction */
+GEN_OPFV_TRANS(vfclass_v, opfv_check)
+
+/* Vector Floating-Point Merge Instruction */
+GEN_OPFVF_TRANS(vfmerge_vfm,  opfvf_check)
+
+static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (vext_check_isa_ill(s) &&
+        vext_check_reg(s, a->rd, false) &&
+        (s->sew != 0)) {
+
+        if (s->vl_eq_vlmax) {
+            tcg_gen_gvec_dup_i64(tcg_ctx, s->sew, vreg_ofs(s, a->rd),
+                                 MAXSZ(s), MAXSZ(s), tcg_ctx->cpu_fpr[a->rs1]);
+        } else {
+            TCGv_ptr dest;
+            TCGv_i32 desc;
+            uint32_t data;
+            FIELD_DP32(0, VDATA, LMUL, s->lmul, data);
+            static gen_helper_vmv_vx * const fns[3] = {
+                gen_helper_vmv_v_x_h,
+                gen_helper_vmv_v_x_w,
+                gen_helper_vmv_v_x_d,
+            };
+            TCGLabel *over = gen_new_label(tcg_ctx);
+            tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+            dest = tcg_temp_new_ptr(tcg_ctx);
+            desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+            tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, a->rd));
+            fns[s->sew - 1](tcg_ctx, dest, tcg_ctx->cpu_fpr[a->rs1], tcg_ctx->cpu_env, desc);
+
+            tcg_temp_free_ptr(tcg_ctx, dest);
+            tcg_temp_free_i32(tcg_ctx, desc);
+            gen_set_label(tcg_ctx, over);
+        }
+        return true;
+    }
+    return false;
+}
+
+/* Single-Width Floating-Point/Integer Type-Convert Instructions */
+GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check)
+GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check)
+GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check)
+GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check)
+
+/* Widening Floating-Point/Integer Type-Convert Instructions */
+
+/*
+ * If the current SEW does not correspond to a supported IEEE floating-point
+ * type, an illegal instruction exception is raised
+ */
+static bool opfv_widen_check(DisasContext *s, arg_rmr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, true) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
+                                     1 << s->lmul) &&
+            (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+}
+
+#define GEN_OPFV_WIDEN_TRANS(NAME)                                 \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
+{                                                                  \
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;                          \
+    if (opfv_widen_check(s, a)) {                                  \
+        uint32_t data = 0;                                         \
+        static gen_helper_gvec_3_ptr * const fns[2] = {            \
+            gen_helper_##NAME##_h,                                 \
+            gen_helper_##NAME##_w,                                 \
+        };                                                         \
+        TCGLabel *over = gen_new_label(tcg_ctx);                          \
+        gen_set_rm(s, 7);                                          \
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);          \
+                                                                   \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);             \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);                 \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);             \
+        tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
+                           vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0,        \
+                           s->vlen / 8, data, fns[s->sew - 1]);    \
+        gen_set_label(tcg_ctx, over);                                       \
+        return true;                                               \
+    }                                                              \
+    return false;                                                  \
+}
+
+GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v)
+GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v)
+GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v)
+GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v)
+GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v)
+
+/* Narrowing Floating-Point/Integer Type-Convert Instructions */
+
+/*
+ * If the current SEW does not correspond to a supported IEEE floating-point
+ * type, an illegal instruction exception is raised
+ */
+static bool opfv_narrow_check(DisasContext *s, arg_rmr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, true) &&
+            vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
+                                     2 << s->lmul) &&
+            (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+}
+
+#define GEN_OPFV_NARROW_TRANS(NAME)                                \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
+{                                                                  \
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;                          \
+    if (opfv_narrow_check(s, a)) {                                 \
+        uint32_t data = 0;                                         \
+        static gen_helper_gvec_3_ptr * const fns[2] = {            \
+            gen_helper_##NAME##_h,                                 \
+            gen_helper_##NAME##_w,                                 \
+        };                                                         \
+        TCGLabel *over = gen_new_label(tcg_ctx);                          \
+        gen_set_rm(s, 7);                                          \
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);          \
+                                                                   \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);             \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);                 \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);             \
+        tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
+                           vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0,        \
+                           s->vlen / 8, data, fns[s->sew - 1]);    \
+        gen_set_label(tcg_ctx, over);                                       \
+        return true;                                               \
+    }                                                              \
+    return false;                                                  \
+}
+
+GEN_OPFV_NARROW_TRANS(vfncvt_xu_f_v)
+GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v)
+GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v)
+GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v)
+GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v)
+
+/*
+ *** Vector Reduction Operations
+ */
+/* Vector Single-Width Integer Reduction Instructions */
+static bool reduction_check(DisasContext *s, arg_rmrr *a)
+{
+    return vext_check_isa_ill(s) && vext_check_reg(s, a->rs2, false);
+}
+
+GEN_OPIVV_TRANS(vredsum_vs, reduction_check)
+GEN_OPIVV_TRANS(vredmaxu_vs, reduction_check)
+GEN_OPIVV_TRANS(vredmax_vs, reduction_check)
+GEN_OPIVV_TRANS(vredminu_vs, reduction_check)
+GEN_OPIVV_TRANS(vredmin_vs, reduction_check)
+GEN_OPIVV_TRANS(vredand_vs, reduction_check)
+GEN_OPIVV_TRANS(vredor_vs, reduction_check)
+GEN_OPIVV_TRANS(vredxor_vs, reduction_check)
+
+/* Vector Widening Integer Reduction Instructions */
+GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_check)
+GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check)
+
+/* Vector Single-Width Floating-Point Reduction Instructions */
+GEN_OPFVV_TRANS(vfredsum_vs, reduction_check)
+GEN_OPFVV_TRANS(vfredmax_vs, reduction_check)
+GEN_OPFVV_TRANS(vfredmin_vs, reduction_check)
+
+/* Vector Widening Floating-Point Reduction Instructions */
+GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check)
+
+/*
+ *** Vector Mask Operations
+ */
+
+/* Vector Mask-Register Logical Instructions */
+#define GEN_MM_TRANS(NAME)                                         \
+static bool trans_##NAME(DisasContext *s, arg_r *a)                \
+{                                                                  \
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;                          \
+    if (vext_check_isa_ill(s)) {                                   \
+        uint32_t data = 0;                                         \
+        gen_helper_gvec_4_ptr *fn = gen_helper_##NAME;             \
+        TCGLabel *over = gen_new_label(tcg_ctx);                          \
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);          \
+                                                                   \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);             \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);             \
+        tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
+                           vreg_ofs(s, a->rs1),                    \
+                           vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0,        \
+                           s->vlen / 8, data, fn);                 \
+        gen_set_label(tcg_ctx, over);                                       \
+        return true;                                               \
+    }                                                              \
+    return false;                                                  \
+}
+
+GEN_MM_TRANS(vmand_mm)
+GEN_MM_TRANS(vmnand_mm)
+GEN_MM_TRANS(vmandnot_mm)
+GEN_MM_TRANS(vmxor_mm)
+GEN_MM_TRANS(vmor_mm)
+GEN_MM_TRANS(vmnor_mm)
+GEN_MM_TRANS(vmornot_mm)
+GEN_MM_TRANS(vmxnor_mm)
+
+/* Vector mask population count vmpopc */
+static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (vext_check_isa_ill(s)) {
+        TCGv_ptr src2, mask;
+        TCGv dst;
+        TCGv_i32 desc;
+        uint32_t data = 0;
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+        FIELD_DP32(data, VDATA, VM, a->vm, data);
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+
+        mask = tcg_temp_new_ptr(tcg_ctx);
+        src2 = tcg_temp_new_ptr(tcg_ctx);
+        dst = tcg_temp_new(tcg_ctx);
+        desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+
+        tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, a->rs2));
+        tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0));
+
+        gen_helper_vmpopc_m(tcg_ctx, dst, mask, src2, tcg_ctx->cpu_env, desc);
+        gen_set_gpr(tcg_ctx, a->rd, dst);
+
+        tcg_temp_free_ptr(tcg_ctx, mask);
+        tcg_temp_free_ptr(tcg_ctx, src2);
+        tcg_temp_free(tcg_ctx, dst);
+        tcg_temp_free_i32(tcg_ctx, desc);
+        return true;
+    }
+    return false;
+}
+
+/* vmfirst find-first-set mask bit */
+static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (vext_check_isa_ill(s)) {
+        TCGv_ptr src2, mask;
+        TCGv dst;
+        TCGv_i32 desc;
+        uint32_t data = 0;
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+        FIELD_DP32(data, VDATA, VM, a->vm, data);
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+
+        mask = tcg_temp_new_ptr(tcg_ctx);
+        src2 = tcg_temp_new_ptr(tcg_ctx);
+        dst = tcg_temp_new(tcg_ctx);
+        desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data));
+
+        tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, a->rs2));
+        tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0));
+
+        gen_helper_vmfirst_m(tcg_ctx, dst, mask, src2, tcg_ctx->cpu_env, desc);
+        gen_set_gpr(tcg_ctx, a->rd, dst);
+
+        tcg_temp_free_ptr(tcg_ctx, mask);
+        tcg_temp_free_ptr(tcg_ctx, src2);
+        tcg_temp_free(tcg_ctx, dst);
+        tcg_temp_free_i32(tcg_ctx, desc);
+        return true;
+    }
+    return false;
+}
+
+/* vmsbf.m set-before-first mask bit */
+/* vmsif.m set-includ-first mask bit */
+/* vmsof.m set-only-first mask bit */
+#define GEN_M_TRANS(NAME)                                          \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
+{                                                                  \
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;                          \
+    if (vext_check_isa_ill(s)) {                                   \
+        uint32_t data = 0;                                         \
+        gen_helper_gvec_3_ptr *fn = gen_helper_##NAME;             \
+        TCGLabel *over = gen_new_label(tcg_ctx);                          \
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);          \
+                                                                   \
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);             \
+        FIELD_DP32(data, VDATA, VM, a->vm, data);                 \
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);             \
+        tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd),                     \
+                           vreg_ofs(s, 0), vreg_ofs(s, a->rs2),    \
+                           tcg_ctx->cpu_env, 0, s->vlen / 8, data, fn);     \
+        gen_set_label(tcg_ctx, over);                                       \
+        return true;                                               \
+    }                                                              \
+    return false;                                                  \
+}
+
+GEN_M_TRANS(vmsbf_m)
+GEN_M_TRANS(vmsif_m)
+GEN_M_TRANS(vmsof_m)
+
+/* Vector Iota Instruction */
+static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (vext_check_isa_ill(s) &&
+        vext_check_reg(s, a->rd, false) &&
+        vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, 1) &&
+        (a->vm != 0 || a->rd != 0)) {
+        uint32_t data = 0;
+        TCGLabel *over = gen_new_label(tcg_ctx);
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+        FIELD_DP32(data, VDATA, VM, a->vm, data);
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+        static gen_helper_gvec_3_ptr * const fns[4] = {
+            gen_helper_viota_m_b, gen_helper_viota_m_h,
+            gen_helper_viota_m_w, gen_helper_viota_m_d,
+        };
+        tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+                           vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0,
+                           s->vlen / 8, data, fns[s->sew]);
+        gen_set_label(tcg_ctx, over);
+        return true;
+    }
+    return false;
+}
+
+/* Vector Element Index Instruction */
+static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (vext_check_isa_ill(s) &&
+        vext_check_reg(s, a->rd, false) &&
+        vext_check_overlap_mask(s, a->rd, a->vm, false)) {
+        uint32_t data = 0;
+        TCGLabel *over = gen_new_label(tcg_ctx);
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+        FIELD_DP32(data, VDATA, VM, a->vm, data);
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+        static gen_helper_gvec_2_ptr * const fns[4] = {
+            gen_helper_vid_v_b, gen_helper_vid_v_h,
+            gen_helper_vid_v_w, gen_helper_vid_v_d,
+        };
+        tcg_gen_gvec_2_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+                           tcg_ctx->cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
+        gen_set_label(tcg_ctx, over);
+        return true;
+    }
+    return false;
+}
+
+/*
+ *** Vector Permutation Instructions
+ */
+
+/* Integer Extract Instruction */
+
+static void load_element(TCGContext *tcg_ctx, TCGv_i64 dest, TCGv_ptr base,
+                         int ofs, int sew)
+{
+    switch (sew) {
+    case MO_8:
+        tcg_gen_ld8u_i64(tcg_ctx, dest, base, ofs);
+        break;
+    case MO_16:
+        tcg_gen_ld16u_i64(tcg_ctx, dest, base, ofs);
+        break;
+    case MO_32:
+        tcg_gen_ld32u_i64(tcg_ctx, dest, base, ofs);
+        break;
+    case MO_64:
+        tcg_gen_ld_i64(tcg_ctx, dest, base, ofs);
+        break;
+    default:
+        g_assert_not_reached();
+        break;
+    }
+}
+
+/* offset of the idx element with base regsiter r */
+static uint32_t endian_ofs(DisasContext *s, int r, int idx)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew);
+#else
+    return vreg_ofs(s, r) + (idx << s->sew);
+#endif
+}
+
+/* adjust the index according to the endian */
+static void endian_adjust(TCGv_i32 ofs, int sew)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    tcg_gen_xori_i32(ofs, ofs, 7 >> sew);
+#endif
+}
+
+/* Load idx >= VLMAX ? 0 : vreg[idx] */
+static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
+                              int vreg, TCGv idx, int vlmax)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_i32 ofs = tcg_temp_new_i32(tcg_ctx);
+    TCGv_ptr base = tcg_temp_new_ptr(tcg_ctx);
+    TCGv_i64 t_idx = tcg_temp_new_i64(tcg_ctx);
+    TCGv_i64 t_vlmax, t_zero;
+
+    /*
+     * Mask the index to the length so that we do
+     * not produce an out-of-range load.
+     */
+    tcg_gen_trunc_tl_i32(tcg_ctx, ofs, idx);
+    tcg_gen_andi_i32(tcg_ctx, ofs, ofs, vlmax - 1);
+
+    /* Convert the index to an offset. */
+    endian_adjust(ofs, s->sew);
+    tcg_gen_shli_i32(tcg_ctx, ofs, ofs, s->sew);
+
+    /* Convert the index to a pointer. */
+    tcg_gen_ext_i32_ptr(tcg_ctx, base, ofs);
+    tcg_gen_add_ptr(tcg_ctx, base, base, tcg_ctx->cpu_env);
+
+    /* Perform the load. */
+    load_element(tcg_ctx, dest, base,
+                 vreg_ofs(s, vreg), s->sew);
+    tcg_temp_free_ptr(tcg_ctx, base);
+    tcg_temp_free_i32(tcg_ctx, ofs);
+
+    /* Flush out-of-range indexing to zero.  */
+    t_vlmax = tcg_const_i64(tcg_ctx, vlmax);
+    t_zero = tcg_const_i64(tcg_ctx, 0);
+    tcg_gen_extu_tl_i64(tcg_ctx, t_idx, idx);
+
+    tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LTU, dest, t_idx,
+                        t_vlmax, dest, t_zero);
+
+    tcg_temp_free_i64(tcg_ctx, t_vlmax);
+    tcg_temp_free_i64(tcg_ctx, t_zero);
+    tcg_temp_free_i64(tcg_ctx, t_idx);
+}
+
+static void vec_element_loadi(DisasContext *s, TCGv_i64 dest,
+                              int vreg, int idx)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    load_element(tcg_ctx, dest, tcg_ctx->cpu_env, endian_ofs(s, vreg, idx), s->sew);
+}
+
+static bool trans_vext_x_v(DisasContext *s, arg_r *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
+    TCGv dest = tcg_temp_new(tcg_ctx);
+
+    if (a->rs1 == 0) {
+        /* Special case vmv.x.s rd, vs2. */
+        vec_element_loadi(s, tmp, a->rs2, 0);
+    } else {
+        /* This instruction ignores LMUL and vector register groups */
+        int vlmax = s->vlen >> (3 + s->sew);
+        vec_element_loadx(s, tmp, a->rs2, tcg_ctx->cpu_gpr[a->rs1], vlmax);
+    }
+    tcg_gen_trunc_i64_tl(tcg_ctx, dest, tmp);
+    gen_set_gpr(tcg_ctx, a->rd, dest);
+
+    tcg_temp_free(tcg_ctx, dest);
+    tcg_temp_free_i64(tcg_ctx, tmp);
+    return true;
+}
+
+/* Integer Scalar Move Instruction */
+
+static void store_element(TCGContext *tcg_ctx, TCGv_i64 val, TCGv_ptr base,
+                          int ofs, int sew)
+{
+    switch (sew) {
+    case MO_8:
+        tcg_gen_st8_i64(tcg_ctx, val, base, ofs);
+        break;
+    case MO_16:
+        tcg_gen_st16_i64(tcg_ctx, val, base, ofs);
+        break;
+    case MO_32:
+        tcg_gen_st32_i64(tcg_ctx, val, base, ofs);
+        break;
+    case MO_64:
+        tcg_gen_st_i64(tcg_ctx, val, base, ofs);
+        break;
+    default:
+        g_assert_not_reached();
+        break;
+    }
+}
+
+/*
+ * Store vreg[idx] = val.
+ * The index must be in range of VLMAX.
+ */
+static void vec_element_storei(DisasContext *s, int vreg,
+                               int idx, TCGv_i64 val)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    store_element(tcg_ctx, val, tcg_ctx->cpu_env, endian_ofs(s, vreg, idx), s->sew);
+}
+
+/* vmv.s.x vd, rs1 # vd[0] = rs1 */
+static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (vext_check_isa_ill(s)) {
+        /* This instruction ignores LMUL and vector register groups */
+        int maxsz = s->vlen >> 3;
+        TCGv_i64 t1;
+        TCGLabel *over = gen_new_label(tcg_ctx);
+
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+        tcg_gen_gvec_dup_imm(tcg_ctx, SEW64, vreg_ofs(s, a->rd), maxsz, maxsz, 0);
+        if (a->rs1 == 0) {
+            goto done;
+        }
+
+        t1 = tcg_temp_new_i64(tcg_ctx);
+        tcg_gen_extu_tl_i64(tcg_ctx, t1, tcg_ctx->cpu_gpr[a->rs1]);
+        vec_element_storei(s, a->rd, 0, t1);
+        tcg_temp_free_i64(tcg_ctx, t1);
+    done:
+        gen_set_label(tcg_ctx, over);
+        return true;
+    }
+    return false;
+}
+
+/* Floating-Point Scalar Move Instructions */
+static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (!s->vill && has_ext(s, RVF) &&
+        (s->mstatus_fs != 0) && (s->sew != 0)) {
+        unsigned int len = 8 << s->sew;
+
+        vec_element_loadi(s, tcg_ctx->cpu_fpr[a->rd], a->rs2, 0);
+        if (len < 64) {
+            tcg_gen_ori_i64(tcg_ctx, tcg_ctx->cpu_fpr[a->rd], tcg_ctx->cpu_fpr[a->rd],
+                            MAKE_64BIT_MASK(len, 64 - len));
+        }
+
+        mark_fs_dirty(s);
+        return true;
+    }
+    return false;
+}
+
+/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */
+static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (!s->vill && has_ext(s, RVF) && (s->sew != 0)) {
+        TCGv_i64 t1;
+        /* The instructions ignore LMUL and vector register group. */
+        uint32_t vlmax = s->vlen >> 3;
+
+        /* if vl == 0, skip vector register write back */
+        TCGLabel *over = gen_new_label(tcg_ctx);
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+        /* zeroed all elements */
+        tcg_gen_gvec_dup_imm(tcg_ctx, SEW64, vreg_ofs(s, a->rd), vlmax, vlmax, 0);
+
+        /* NaN-box f[rs1] as necessary for SEW */
+        t1 = tcg_temp_new_i64(tcg_ctx);
+        if (s->sew == MO_64 && !has_ext(s, RVD)) {
+            tcg_gen_ori_i64(tcg_ctx, t1, tcg_ctx->cpu_fpr[a->rs1], MAKE_64BIT_MASK(32, 32));
+        } else {
+            tcg_gen_mov_i64(tcg_ctx, t1, tcg_ctx->cpu_fpr[a->rs1]);
+        }
+        vec_element_storei(s, a->rd, 0, t1);
+        tcg_temp_free_i64(tcg_ctx, t1);
+        gen_set_label(tcg_ctx, over);
+        return true;
+    }
+    return false;
+}
+
+/* Vector Slide Instructions */
+static bool slideup_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            (a->rd != a->rs2));
+}
+
+GEN_OPIVX_TRANS(vslideup_vx, slideup_check)
+GEN_OPIVX_TRANS(vslide1up_vx, slideup_check)
+GEN_OPIVI_TRANS(vslideup_vi, 1, vslideup_vx, slideup_check)
+
+GEN_OPIVX_TRANS(vslidedown_vx, opivx_check)
+GEN_OPIVX_TRANS(vslide1down_vx, opivx_check)
+GEN_OPIVI_TRANS(vslidedown_vi, 1, vslidedown_vx, opivx_check)
+
+/* Vector Register Gather Instruction */
+static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs1, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            (a->rd != a->rs2) && (a->rd != a->rs1));
+}
+
+GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check)
+
+static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, true) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            (a->rd != a->rs2));
+}
+
+/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
+static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (!vrgather_vx_check(s, a)) {
+        return false;
+    }
+
+    if (a->vm && s->vl_eq_vlmax) {
+        int vlmax = s->vlen / s->mlen;
+        TCGv_i64 dest = tcg_temp_new_i64(tcg_ctx);
+
+        if (a->rs1 == 0) {
+            vec_element_loadi(s, dest, a->rs2, 0);
+        } else {
+            vec_element_loadx(s, dest, a->rs2, tcg_ctx->cpu_gpr[a->rs1], vlmax);
+        }
+
+        tcg_gen_gvec_dup_i64(tcg_ctx, s->sew, vreg_ofs(s, a->rd),
+                             MAXSZ(s), MAXSZ(s), dest);
+        tcg_temp_free_i64(tcg_ctx, dest);
+    } else {
+        static gen_helper_opivx * const fns[4] = {
+            gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h,
+            gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d
+        };
+        return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);
+    }
+    return true;
+}
+
+/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */
+static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (!vrgather_vx_check(s, a)) {
+        return false;
+    }
+
+    if (a->vm && s->vl_eq_vlmax) {
+        if (a->rs1 >= s->vlen / s->mlen) {
+            tcg_gen_gvec_dup_imm(tcg_ctx, SEW64, vreg_ofs(s, a->rd),
+                                 MAXSZ(s), MAXSZ(s), 0);
+        } else {
+            tcg_gen_gvec_dup_mem(tcg_ctx, s->sew, vreg_ofs(s, a->rd),
+                                 endian_ofs(s, a->rs2, a->rs1),
+                                 MAXSZ(s), MAXSZ(s));
+        }
+    } else {
+        static gen_helper_opivx * const fns[4] = {
+            gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h,
+            gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d
+        };
+        return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, 1);
+    }
+    return true;
+}
+
+/* Vector Compress Instruction */
+static bool vcompress_vm_check(DisasContext *s, arg_r *a)
+{
+    return (vext_check_isa_ill(s) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs1, 1) &&
+            (a->rd != a->rs2));
+}
+
+static bool trans_vcompress_vm(DisasContext *s, arg_r *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    if (vcompress_vm_check(s, a)) {
+        uint32_t data = 0;
+        static gen_helper_gvec_4_ptr * const fns[4] = {
+            gen_helper_vcompress_vm_b, gen_helper_vcompress_vm_h,
+            gen_helper_vcompress_vm_w, gen_helper_vcompress_vm_d,
+        };
+        TCGLabel *over = gen_new_label(tcg_ctx);
+        tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over);
+
+        FIELD_DP32(data, VDATA, MLEN, s->mlen, data);
+        FIELD_DP32(data, VDATA, LMUL, s->lmul, data);
+        tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+                           vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
+                           tcg_ctx->cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
+        gen_set_label(tcg_ctx, over);
+        return true;
+    }
+    return false;
+}
diff --git a/qemu/target/riscv/internals.h b/qemu/target/riscv/internals.h
new file mode 100644
index 0000000000..37d33820ad
--- /dev/null
+++ b/qemu/target/riscv/internals.h
@@ -0,0 +1,41 @@
+/*
+ * QEMU RISC-V CPU -- internal functions and types
+ *
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef RISCV_CPU_INTERNALS_H
+#define RISCV_CPU_INTERNALS_H
+
+#include "hw/registerfields.h"
+
+/* share data between vector helpers and decode code */
+FIELD(VDATA, MLEN, 0, 8)
+FIELD(VDATA, VM, 8, 1)
+FIELD(VDATA, LMUL, 9, 2)
+FIELD(VDATA, NF, 11, 4)
+FIELD(VDATA, WD, 11, 1)
+
+/* float point classify helpers */
+target_ulong fclass_h(uint64_t frs1);
+target_ulong fclass_s(uint64_t frs1);
+target_ulong fclass_d(uint64_t frs1);
+
+#define SEW8  0
+#define SEW16 1
+#define SEW32 2
+#define SEW64 3
+
+#endif
diff --git a/qemu/target/riscv/op_helper.c b/qemu/target/riscv/op_helper.c
index 5afb2ce881..c5de354a05 100644
--- a/qemu/target/riscv/op_helper.c
+++ b/qemu/target/riscv/op_helper.c
@@ -81,8 +81,7 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb)
         riscv_raise_exception(env, RISCV_EXCP_INST_ADDR_MIS, GETPC());
     }
 
-    if (env->priv_ver >= PRIV_VERSION_1_10_0 &&
-        get_field(env->mstatus, MSTATUS_TSR) && !(env->priv >= PRV_M)) {
+    if (get_field(env->mstatus, MSTATUS_TSR) && !(env->priv >= PRV_M)) {
         riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
     }
 
@@ -116,10 +115,8 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb)
     } else {
         prev_priv = get_field(mstatus, MSTATUS_SPP);
 
-        mstatus = set_field(mstatus,
-            env->priv_ver >= PRIV_VERSION_1_10_0 ?
-            MSTATUS_SIE : MSTATUS_UIE << prev_priv,
-            get_field(mstatus, MSTATUS_SPIE));
+        mstatus = set_field(mstatus, MSTATUS_SIE,
+                            get_field(mstatus, MSTATUS_SPIE));
         mstatus = set_field(mstatus, MSTATUS_SPIE, 1);
         mstatus = set_field(mstatus, MSTATUS_SPP, PRV_U);
         env->mstatus = mstatus;
@@ -144,10 +141,8 @@ target_ulong helper_mret(CPURISCVState *env, target_ulong cpu_pc_deb)
     target_ulong mstatus = env->mstatus;
     target_ulong prev_priv = get_field(mstatus, MSTATUS_MPP);
     target_ulong prev_virt = MSTATUS_MPV_ISSET(env);
-    mstatus = set_field(mstatus,
-        env->priv_ver >= PRIV_VERSION_1_10_0 ?
-        MSTATUS_MIE : MSTATUS_UIE << prev_priv,
-        get_field(mstatus, MSTATUS_MPIE));
+    mstatus = set_field(mstatus, MSTATUS_MIE,
+                        get_field(mstatus, MSTATUS_MPIE));
     mstatus = set_field(mstatus, MSTATUS_MPIE, 1);
     mstatus = set_field(mstatus, MSTATUS_MPP, PRV_U);
 #ifdef TARGET_RISCV32
@@ -194,7 +189,6 @@ void helper_tlb_flush(CPURISCVState *env)
     CPUState *cs = env_cpu(env);
     if (!(env->priv >= PRV_S) ||
         (env->priv == PRV_S &&
-         env->priv_ver >= PRIV_VERSION_1_10_0 &&
          get_field(env->mstatus, MSTATUS_TVM))) {
         riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
     } else {
@@ -202,6 +196,19 @@ void helper_tlb_flush(CPURISCVState *env)
     }
 }
 
+void helper_hyp_tlb_flush(CPURISCVState *env)
+{
+    CPUState *cs = env_cpu(env);
+
+    if (env->priv == PRV_M ||
+        (env->priv == PRV_S && !riscv_cpu_virt_enabled(env))) {
+        tlb_flush(cs);
+        return;
+    }
+
+    riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+}
+
 void helper_uc_riscv_exit(CPURISCVState *env)
 {
     CPUState *cs = env_cpu(env);
@@ -209,4 +216,4 @@ void helper_uc_riscv_exit(CPURISCVState *env)
     cs->exception_index = EXCP_HLT;
     cs->halted = 1;
     cpu_loop_exit(cs);
-}
\ No newline at end of file
+}
diff --git a/qemu/target/riscv/pmp.c b/qemu/target/riscv/pmp.c
index 888b99c8d9..9e1e614951 100644
--- a/qemu/target/riscv/pmp.c
+++ b/qemu/target/riscv/pmp.c
@@ -169,7 +169,7 @@ static void pmp_update_rule(CPURISCVState *env, uint32_t pmp_index)
 
     case PMP_AMATCH_NA4:
         sa = this_addr << 2; /* shift up from [xx:0] to [xx+2:2] */
-        ea = (this_addr + 4u) - 1u;
+        ea = (sa + 4u) - 1u;
         break;
 
     case PMP_AMATCH_NAPOT:
@@ -231,16 +231,20 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr,
         return true;
     }
 
-    /*
-     * if size is unknown (0), assume that all bytes
-     * from addr to the end of the page will be accessed.
-     */
     if (size == 0) {
+        if (riscv_feature(env, RISCV_FEATURE_MMU)) {
+            /*
+             * If size is unknown (0), assume that all bytes
+             * from addr to the end of the page will be accessed.
+             */
 #ifdef _MSC_VER
-        pmp_size = 0 - (addr | TARGET_PAGE_MASK);
+            pmp_size = 0 - (addr | TARGET_PAGE_MASK);
 #else
-        pmp_size = -(addr | TARGET_PAGE_MASK);
+            pmp_size = -(addr | TARGET_PAGE_MASK);
 #endif
+        } else {
+            pmp_size = sizeof(target_ulong);
+        }
     } else {
         pmp_size = size;
     }
diff --git a/qemu/target/riscv/riscv32/decode_insn16.inc.c b/qemu/target/riscv/riscv32/decode_insn16.inc.c
index 66ebf61203..ba4cccaf76 100644
--- a/qemu/target/riscv/riscv32/decode_insn16.inc.c
+++ b/qemu/target/riscv/riscv32/decode_insn16.inc.c
@@ -1,11 +1,9 @@
 /* This file is autogenerated by scripts/decodetree.py.  */
 
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wredundant-decls"
-# ifdef __clang__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wredundant-decls"
+#ifdef __clang__
 #  pragma GCC diagnostic ignored "-Wtypedef-redefinition"
-# endif
 #endif
 
 typedef arg_empty arg_illegal;
@@ -55,9 +53,7 @@ static bool trans_flw(DisasContext *ctx, arg_flw *a);
 typedef arg_s arg_fsw;
 static bool trans_fsw(DisasContext *ctx, arg_fsw *a);
 
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-# pragma GCC diagnostic pop
-#endif
+#pragma GCC diagnostic pop
 
 static void decode_insn16_extract_c_addi16sp(DisasContext *ctx, arg_i *a, uint16_t insn)
 {
@@ -231,55 +227,45 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         /* 000..... ......00 */
         if ((insn & 0x00001fe0) == 0x00000000) {
             /* 00000000 000...00 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:87 */
             decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
-            ctx->invalid = true;
             if (trans_illegal(ctx, &u.f_empty)) return true;
         }
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:88 */
         decode_insn16_extract_c_addi4spn(ctx, &u.f_i, insn);
         if (trans_addi(ctx, &u.f_i)) return true;
         return false;
     case 0x00000001:
         /* 000..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:96 */
         decode_insn16_extract_ci(ctx, &u.f_i, insn);
         if (trans_addi(ctx, &u.f_i)) return true;
         return false;
     case 0x00000002:
         /* 000..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:115 */
         decode_insn16_extract_c_shift2(ctx, &u.f_shift, insn);
         if (trans_slli(ctx, &u.f_shift)) return true;
         return false;
     case 0x00002000:
         /* 001..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:90 */
         decode_insn16_extract_cl_d(ctx, &u.f_i, insn);
         if (trans_fld(ctx, &u.f_i)) return true;
         return false;
     case 0x00002001:
         /* 001..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:24 */
         decode_insn16_extract_cj(ctx, &u.f_j, insn);
         u.f_j.rd = 1;
         if (trans_jal(ctx, &u.f_j)) return true;
         return false;
     case 0x00002002:
         /* 001..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:116 */
         decode_insn16_extract_c_ldsp(ctx, &u.f_i, insn);
         if (trans_fld(ctx, &u.f_i)) return true;
         return false;
     case 0x00004000:
         /* 010..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:91 */
         decode_insn16_extract_cl_w(ctx, &u.f_i, insn);
         if (trans_lw(ctx, &u.f_i)) return true;
         return false;
     case 0x00004001:
         /* 010..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:97 */
         decode_insn16_extract_c_li(ctx, &u.f_i, insn);
         if (trans_addi(ctx, &u.f_i)) return true;
         return false;
@@ -287,17 +273,14 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         /* 010..... ......10 */
         if ((insn & 0x00000f80) == 0x00000000) {
             /* 010.0000 0.....10 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:118 */
             decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
             if (trans_illegal(ctx, &u.f_empty)) return true;
         }
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:119 */
         decode_insn16_extract_c_lwsp(ctx, &u.f_i, insn);
         if (trans_lw(ctx, &u.f_i)) return true;
         return false;
     case 0x00006000:
         /* 011..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:20 */
         decode_insn16_extract_cl_w(ctx, &u.f_i, insn);
         if (trans_flw(ctx, &u.f_i)) return true;
         return false;
@@ -305,23 +288,19 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         /* 011..... ......01 */
         if ((insn & 0x0000107c) == 0x00000000) {
             /* 0110.... .0000001 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:99 */
             decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
             if (trans_illegal(ctx, &u.f_empty)) return true;
         }
         if ((insn & 0x00000f80) == 0x00000100) {
             /* 011.0001 0.....01 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:100 */
             decode_insn16_extract_c_addi16sp(ctx, &u.f_i, insn);
             if (trans_addi(ctx, &u.f_i)) return true;
         }
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:101 */
         decode_insn16_extract_c_lui(ctx, &u.f_u, insn);
         if (trans_lui(ctx, &u.f_u)) return true;
         return false;
     case 0x00006002:
         /* 011..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:27 */
         decode_insn16_extract_c_lwsp(ctx, &u.f_i, insn);
         if (trans_flw(ctx, &u.f_i)) return true;
         return false;
@@ -330,19 +309,16 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         switch ((insn >> 10) & 0x3) {
         case 0x0:
             /* 100.00.. ......01 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:103 */
             decode_insn16_extract_c_shift(ctx, &u.f_shift, insn);
             if (trans_srli(ctx, &u.f_shift)) return true;
             return false;
         case 0x1:
             /* 100.01.. ......01 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:104 */
             decode_insn16_extract_c_shift(ctx, &u.f_shift, insn);
             if (trans_srai(ctx, &u.f_shift)) return true;
             return false;
         case 0x2:
             /* 100.10.. ......01 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:105 */
             decode_insn16_extract_c_andi(ctx, &u.f_i, insn);
             if (trans_andi(ctx, &u.f_i)) return true;
             return false;
@@ -352,22 +328,18 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
             switch (insn & 0x00001060) {
             case 0x00000000:
                 /* 100011.. .00...01 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:106 */
                 if (trans_sub(ctx, &u.f_r)) return true;
                 return false;
             case 0x00000020:
                 /* 100011.. .01...01 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:107 */
                 if (trans_xor(ctx, &u.f_r)) return true;
                 return false;
             case 0x00000040:
                 /* 100011.. .10...01 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:108 */
                 if (trans_or(ctx, &u.f_r)) return true;
                 return false;
             case 0x00000060:
                 /* 100011.. .11...01 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:109 */
                 if (trans_and(ctx, &u.f_r)) return true;
                 return false;
             }
@@ -381,18 +353,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
             /* 1000.... ......10 */
             if ((insn & 0x00000ffc) == 0x00000000) {
                 /* 10000000 00000010 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:122 */
                 decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
                 if (trans_illegal(ctx, &u.f_empty)) return true;
             }
             if ((insn & 0x0000007c) == 0x00000000) {
                 /* 1000.... .0000010 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:123 */
                 decode_insn16_extract_c_jalr(ctx, &u.f_i, insn);
                 u.f_i.rd = 0;
                 if (trans_jalr(ctx, &u.f_i)) return true;
             }
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:124 */
             decode_insn16_extract_c_mv(ctx, &u.f_i, insn);
             if (trans_addi(ctx, &u.f_i)) return true;
             return false;
@@ -400,18 +369,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
             /* 1001.... ......10 */
             if ((insn & 0x00000ffc) == 0x00000000) {
                 /* 10010000 00000010 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:127 */
                 decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
                 if (trans_ebreak(ctx, &u.f_empty)) return true;
             }
             if ((insn & 0x0000007c) == 0x00000000) {
                 /* 1001.... .0000010 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:128 */
                 decode_insn16_extract_c_jalr(ctx, &u.f_i, insn);
                 u.f_i.rd = 1;
                 if (trans_jalr(ctx, &u.f_i)) return true;
             }
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:129 */
             decode_insn16_extract_cr(ctx, &u.f_r, insn);
             if (trans_add(ctx, &u.f_r)) return true;
             return false;
@@ -419,56 +385,47 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         return false;
     case 0x0000a000:
         /* 101..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:92 */
         decode_insn16_extract_cs_d(ctx, &u.f_s, insn);
         if (trans_fsd(ctx, &u.f_s)) return true;
         return false;
     case 0x0000a001:
         /* 101..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:110 */
         decode_insn16_extract_cj(ctx, &u.f_j, insn);
         u.f_j.rd = 0;
         if (trans_jal(ctx, &u.f_j)) return true;
         return false;
     case 0x0000a002:
         /* 101..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:131 */
         decode_insn16_extract_c_sdsp(ctx, &u.f_s, insn);
         if (trans_fsd(ctx, &u.f_s)) return true;
         return false;
     case 0x0000c000:
         /* 110..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:93 */
         decode_insn16_extract_cs_w(ctx, &u.f_s, insn);
         if (trans_sw(ctx, &u.f_s)) return true;
         return false;
     case 0x0000c001:
         /* 110..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:111 */
         decode_insn16_extract_cb_z(ctx, &u.f_b, insn);
         if (trans_beq(ctx, &u.f_b)) return true;
         return false;
     case 0x0000c002:
         /* 110..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:132 */
         decode_insn16_extract_c_swsp(ctx, &u.f_s, insn);
         if (trans_sw(ctx, &u.f_s)) return true;
         return false;
     case 0x0000e000:
         /* 111..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:21 */
         decode_insn16_extract_cs_w(ctx, &u.f_s, insn);
         if (trans_fsw(ctx, &u.f_s)) return true;
         return false;
     case 0x0000e001:
         /* 111..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:112 */
         decode_insn16_extract_cb_z(ctx, &u.f_b, insn);
         if (trans_bne(ctx, &u.f_b)) return true;
         return false;
     case 0x0000e002:
         /* 111..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:28 */
         decode_insn16_extract_c_swsp(ctx, &u.f_s, insn);
         if (trans_fsw(ctx, &u.f_s)) return true;
         return false;
diff --git a/qemu/target/riscv/riscv32/decode_insn32.inc.c b/qemu/target/riscv/riscv32/decode_insn32.inc.c
index c4c25de13b..ce08737432 100644
--- a/qemu/target/riscv/riscv32/decode_insn32.inc.c
+++ b/qemu/target/riscv/riscv32/decode_insn32.inc.c
@@ -14,56 +14,70 @@ typedef struct {
     int rs2;
 } arg_b;
 
+typedef struct {
+    int csr;
+    int rd;
+    int rs1;
+} arg_decode_insn3214;
+
 typedef struct {
     int rd;
     int rm;
     int rs1;
     int rs2;
     int rs3;
-} arg_decode_insn3210;
+} arg_decode_insn3215;
 
 typedef struct {
     int rd;
     int rm;
     int rs1;
     int rs2;
-} arg_decode_insn3211;
+} arg_decode_insn3216;
 
 typedef struct {
     int rd;
     int rm;
     int rs1;
-} arg_decode_insn3212;
+} arg_decode_insn3217;
 
 typedef struct {
     int rd;
     int rs1;
-} arg_decode_insn3213;
+} arg_decode_insn3218;
 
 typedef struct {
-    int rs1;
+    int rd;
+    int vm;
+} arg_decode_insn3219;
+
+typedef struct {
+    int rd;
     int rs2;
-} arg_decode_insn3214;
+} arg_decode_insn3220;
 
 typedef struct {
+    int rd;
     int rs1;
-} arg_decode_insn3215;
+    int zimm;
+} arg_decode_insn3221;
 
 typedef struct {
-    int pred;
-    int succ;
-} arg_decode_insn3216;
+    int rs1;
+    int rs2;
+} arg_decode_insn3222;
 
 typedef struct {
-    int csr;
-    int rd;
     int rs1;
-} arg_decode_insn329;
+} arg_decode_insn3223;
+
+typedef struct {
+    int pred;
+    int succ;
+} arg_decode_insn3224;
 
 typedef struct {
-#ifdef _MSC_VER
-    int dummy;  // MSVC does not allow empty struct
-#endif
+    int : 0;
 } arg_empty;
 
 typedef struct {
@@ -83,6 +97,42 @@ typedef struct {
     int rs2;
 } arg_r;
 
+typedef struct {
+    int nf;
+    int rd;
+    int rs1;
+    int vm;
+} arg_r2nfvm;
+
+typedef struct {
+    int rd;
+    int rs2;
+    int vm;
+} arg_rmr;
+
+typedef struct {
+    int rd;
+    int rs1;
+    int rs2;
+    int vm;
+} arg_rmrr;
+
+typedef struct {
+    int nf;
+    int rd;
+    int rs1;
+    int rs2;
+    int vm;
+} arg_rnfvm;
+
+typedef struct {
+    int rd;
+    int rs1;
+    int rs2;
+    int vm;
+    int wd;
+} arg_rwdvm;
+
 typedef struct {
     int imm;
     int rs1;
@@ -112,13 +162,9 @@ typedef arg_empty arg_mret;
 static bool trans_mret(DisasContext *ctx, arg_mret *a);
 typedef arg_empty arg_wfi;
 static bool trans_wfi(DisasContext *ctx, arg_wfi *a);
-typedef arg_decode_insn3214 arg_hfence_gvma;
-static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a);
-typedef arg_decode_insn3214 arg_hfence_bvma;
-static bool trans_hfence_bvma(DisasContext *ctx, arg_hfence_bvma *a);
-typedef arg_decode_insn3214 arg_sfence_vma;
+typedef arg_decode_insn3222 arg_sfence_vma;
 static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a);
-typedef arg_decode_insn3215 arg_sfence_vm;
+typedef arg_decode_insn3223 arg_sfence_vm;
 static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a);
 typedef arg_u arg_lui;
 static bool trans_lui(DisasContext *ctx, arg_lui *a);
@@ -194,21 +240,21 @@ typedef arg_r arg_or;
 static bool trans_or(DisasContext *ctx, arg_or *a);
 typedef arg_r arg_and;
 static bool trans_and(DisasContext *ctx, arg_and *a);
-typedef arg_decode_insn3216 arg_fence;
+typedef arg_decode_insn3224 arg_fence;
 static bool trans_fence(DisasContext *ctx, arg_fence *a);
 typedef arg_empty arg_fence_i;
 static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a);
-typedef arg_decode_insn329 arg_csrrw;
+typedef arg_decode_insn3214 arg_csrrw;
 static bool trans_csrrw(DisasContext *ctx, arg_csrrw *a);
-typedef arg_decode_insn329 arg_csrrs;
+typedef arg_decode_insn3214 arg_csrrs;
 static bool trans_csrrs(DisasContext *ctx, arg_csrrs *a);
-typedef arg_decode_insn329 arg_csrrc;
+typedef arg_decode_insn3214 arg_csrrc;
 static bool trans_csrrc(DisasContext *ctx, arg_csrrc *a);
-typedef arg_decode_insn329 arg_csrrwi;
+typedef arg_decode_insn3214 arg_csrrwi;
 static bool trans_csrrwi(DisasContext *ctx, arg_csrrwi *a);
-typedef arg_decode_insn329 arg_csrrsi;
+typedef arg_decode_insn3214 arg_csrrsi;
 static bool trans_csrrsi(DisasContext *ctx, arg_csrrsi *a);
-typedef arg_decode_insn329 arg_csrrci;
+typedef arg_decode_insn3214 arg_csrrci;
 static bool trans_csrrci(DisasContext *ctx, arg_csrrci *a);
 typedef arg_r arg_mul;
 static bool trans_mul(DisasContext *ctx, arg_mul *a);
@@ -252,23 +298,23 @@ typedef arg_i arg_flw;
 static bool trans_flw(DisasContext *ctx, arg_flw *a);
 typedef arg_s arg_fsw;
 static bool trans_fsw(DisasContext *ctx, arg_fsw *a);
-typedef arg_decode_insn3210 arg_fmadd_s;
+typedef arg_decode_insn3215 arg_fmadd_s;
 static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a);
-typedef arg_decode_insn3210 arg_fmsub_s;
+typedef arg_decode_insn3215 arg_fmsub_s;
 static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a);
-typedef arg_decode_insn3210 arg_fnmsub_s;
+typedef arg_decode_insn3215 arg_fnmsub_s;
 static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a);
-typedef arg_decode_insn3210 arg_fnmadd_s;
+typedef arg_decode_insn3215 arg_fnmadd_s;
 static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a);
-typedef arg_decode_insn3211 arg_fadd_s;
+typedef arg_decode_insn3216 arg_fadd_s;
 static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a);
-typedef arg_decode_insn3211 arg_fsub_s;
+typedef arg_decode_insn3216 arg_fsub_s;
 static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a);
-typedef arg_decode_insn3211 arg_fmul_s;
+typedef arg_decode_insn3216 arg_fmul_s;
 static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a);
-typedef arg_decode_insn3211 arg_fdiv_s;
+typedef arg_decode_insn3216 arg_fdiv_s;
 static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a);
-typedef arg_decode_insn3212 arg_fsqrt_s;
+typedef arg_decode_insn3217 arg_fsqrt_s;
 static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a);
 typedef arg_r arg_fsgnj_s;
 static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a);
@@ -280,11 +326,11 @@ typedef arg_r arg_fmin_s;
 static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a);
 typedef arg_r arg_fmax_s;
 static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a);
-typedef arg_decode_insn3212 arg_fcvt_w_s;
+typedef arg_decode_insn3217 arg_fcvt_w_s;
 static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a);
-typedef arg_decode_insn3212 arg_fcvt_wu_s;
+typedef arg_decode_insn3217 arg_fcvt_wu_s;
 static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a);
-typedef arg_decode_insn3213 arg_fmv_x_w;
+typedef arg_decode_insn3218 arg_fmv_x_w;
 static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a);
 typedef arg_r arg_feq_s;
 static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a);
@@ -292,35 +338,35 @@ typedef arg_r arg_flt_s;
 static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a);
 typedef arg_r arg_fle_s;
 static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a);
-typedef arg_decode_insn3213 arg_fclass_s;
+typedef arg_decode_insn3218 arg_fclass_s;
 static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a);
-typedef arg_decode_insn3212 arg_fcvt_s_w;
+typedef arg_decode_insn3217 arg_fcvt_s_w;
 static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a);
-typedef arg_decode_insn3212 arg_fcvt_s_wu;
+typedef arg_decode_insn3217 arg_fcvt_s_wu;
 static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a);
-typedef arg_decode_insn3213 arg_fmv_w_x;
+typedef arg_decode_insn3218 arg_fmv_w_x;
 static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a);
 typedef arg_i arg_fld;
 static bool trans_fld(DisasContext *ctx, arg_fld *a);
 typedef arg_s arg_fsd;
 static bool trans_fsd(DisasContext *ctx, arg_fsd *a);
-typedef arg_decode_insn3210 arg_fmadd_d;
+typedef arg_decode_insn3215 arg_fmadd_d;
 static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a);
-typedef arg_decode_insn3210 arg_fmsub_d;
+typedef arg_decode_insn3215 arg_fmsub_d;
 static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a);
-typedef arg_decode_insn3210 arg_fnmsub_d;
+typedef arg_decode_insn3215 arg_fnmsub_d;
 static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a);
-typedef arg_decode_insn3210 arg_fnmadd_d;
+typedef arg_decode_insn3215 arg_fnmadd_d;
 static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a);
-typedef arg_decode_insn3211 arg_fadd_d;
+typedef arg_decode_insn3216 arg_fadd_d;
 static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a);
-typedef arg_decode_insn3211 arg_fsub_d;
+typedef arg_decode_insn3216 arg_fsub_d;
 static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a);
-typedef arg_decode_insn3211 arg_fmul_d;
+typedef arg_decode_insn3216 arg_fmul_d;
 static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a);
-typedef arg_decode_insn3211 arg_fdiv_d;
+typedef arg_decode_insn3216 arg_fdiv_d;
 static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a);
-typedef arg_decode_insn3212 arg_fsqrt_d;
+typedef arg_decode_insn3217 arg_fsqrt_d;
 static bool trans_fsqrt_d(DisasContext *ctx, arg_fsqrt_d *a);
 typedef arg_r arg_fsgnj_d;
 static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a);
@@ -332,9 +378,9 @@ typedef arg_r arg_fmin_d;
 static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a);
 typedef arg_r arg_fmax_d;
 static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a);
-typedef arg_decode_insn3212 arg_fcvt_s_d;
+typedef arg_decode_insn3217 arg_fcvt_s_d;
 static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a);
-typedef arg_decode_insn3212 arg_fcvt_d_s;
+typedef arg_decode_insn3217 arg_fcvt_d_s;
 static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a);
 typedef arg_r arg_feq_d;
 static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a);
@@ -342,16 +388,704 @@ typedef arg_r arg_flt_d;
 static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a);
 typedef arg_r arg_fle_d;
 static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a);
-typedef arg_decode_insn3213 arg_fclass_d;
+typedef arg_decode_insn3218 arg_fclass_d;
 static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a);
-typedef arg_decode_insn3212 arg_fcvt_w_d;
+typedef arg_decode_insn3217 arg_fcvt_w_d;
 static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a);
-typedef arg_decode_insn3212 arg_fcvt_wu_d;
+typedef arg_decode_insn3217 arg_fcvt_wu_d;
 static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a);
-typedef arg_decode_insn3212 arg_fcvt_d_w;
+typedef arg_decode_insn3217 arg_fcvt_d_w;
 static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a);
-typedef arg_decode_insn3212 arg_fcvt_d_wu;
+typedef arg_decode_insn3217 arg_fcvt_d_wu;
 static bool trans_fcvt_d_wu(DisasContext *ctx, arg_fcvt_d_wu *a);
+typedef arg_decode_insn3222 arg_hfence_gvma;
+static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a);
+typedef arg_decode_insn3222 arg_hfence_vvma;
+static bool trans_hfence_vvma(DisasContext *ctx, arg_hfence_vvma *a);
+typedef arg_r2nfvm arg_vlb_v;
+static bool trans_vlb_v(DisasContext *ctx, arg_vlb_v *a);
+typedef arg_r2nfvm arg_vlh_v;
+static bool trans_vlh_v(DisasContext *ctx, arg_vlh_v *a);
+typedef arg_r2nfvm arg_vlw_v;
+static bool trans_vlw_v(DisasContext *ctx, arg_vlw_v *a);
+typedef arg_r2nfvm arg_vle_v;
+static bool trans_vle_v(DisasContext *ctx, arg_vle_v *a);
+typedef arg_r2nfvm arg_vlbu_v;
+static bool trans_vlbu_v(DisasContext *ctx, arg_vlbu_v *a);
+typedef arg_r2nfvm arg_vlhu_v;
+static bool trans_vlhu_v(DisasContext *ctx, arg_vlhu_v *a);
+typedef arg_r2nfvm arg_vlwu_v;
+static bool trans_vlwu_v(DisasContext *ctx, arg_vlwu_v *a);
+typedef arg_r2nfvm arg_vlbff_v;
+static bool trans_vlbff_v(DisasContext *ctx, arg_vlbff_v *a);
+typedef arg_r2nfvm arg_vlhff_v;
+static bool trans_vlhff_v(DisasContext *ctx, arg_vlhff_v *a);
+typedef arg_r2nfvm arg_vlwff_v;
+static bool trans_vlwff_v(DisasContext *ctx, arg_vlwff_v *a);
+typedef arg_r2nfvm arg_vleff_v;
+static bool trans_vleff_v(DisasContext *ctx, arg_vleff_v *a);
+typedef arg_r2nfvm arg_vlbuff_v;
+static bool trans_vlbuff_v(DisasContext *ctx, arg_vlbuff_v *a);
+typedef arg_r2nfvm arg_vlhuff_v;
+static bool trans_vlhuff_v(DisasContext *ctx, arg_vlhuff_v *a);
+typedef arg_r2nfvm arg_vlwuff_v;
+static bool trans_vlwuff_v(DisasContext *ctx, arg_vlwuff_v *a);
+typedef arg_r2nfvm arg_vsb_v;
+static bool trans_vsb_v(DisasContext *ctx, arg_vsb_v *a);
+typedef arg_r2nfvm arg_vsh_v;
+static bool trans_vsh_v(DisasContext *ctx, arg_vsh_v *a);
+typedef arg_r2nfvm arg_vsw_v;
+static bool trans_vsw_v(DisasContext *ctx, arg_vsw_v *a);
+typedef arg_r2nfvm arg_vse_v;
+static bool trans_vse_v(DisasContext *ctx, arg_vse_v *a);
+typedef arg_rnfvm arg_vlsb_v;
+static bool trans_vlsb_v(DisasContext *ctx, arg_vlsb_v *a);
+typedef arg_rnfvm arg_vlsh_v;
+static bool trans_vlsh_v(DisasContext *ctx, arg_vlsh_v *a);
+typedef arg_rnfvm arg_vlsw_v;
+static bool trans_vlsw_v(DisasContext *ctx, arg_vlsw_v *a);
+typedef arg_rnfvm arg_vlse_v;
+static bool trans_vlse_v(DisasContext *ctx, arg_vlse_v *a);
+typedef arg_rnfvm arg_vlsbu_v;
+static bool trans_vlsbu_v(DisasContext *ctx, arg_vlsbu_v *a);
+typedef arg_rnfvm arg_vlshu_v;
+static bool trans_vlshu_v(DisasContext *ctx, arg_vlshu_v *a);
+typedef arg_rnfvm arg_vlswu_v;
+static bool trans_vlswu_v(DisasContext *ctx, arg_vlswu_v *a);
+typedef arg_rnfvm arg_vssb_v;
+static bool trans_vssb_v(DisasContext *ctx, arg_vssb_v *a);
+typedef arg_rnfvm arg_vssh_v;
+static bool trans_vssh_v(DisasContext *ctx, arg_vssh_v *a);
+typedef arg_rnfvm arg_vssw_v;
+static bool trans_vssw_v(DisasContext *ctx, arg_vssw_v *a);
+typedef arg_rnfvm arg_vsse_v;
+static bool trans_vsse_v(DisasContext *ctx, arg_vsse_v *a);
+typedef arg_rnfvm arg_vlxb_v;
+static bool trans_vlxb_v(DisasContext *ctx, arg_vlxb_v *a);
+typedef arg_rnfvm arg_vlxh_v;
+static bool trans_vlxh_v(DisasContext *ctx, arg_vlxh_v *a);
+typedef arg_rnfvm arg_vlxw_v;
+static bool trans_vlxw_v(DisasContext *ctx, arg_vlxw_v *a);
+typedef arg_rnfvm arg_vlxe_v;
+static bool trans_vlxe_v(DisasContext *ctx, arg_vlxe_v *a);
+typedef arg_rnfvm arg_vlxbu_v;
+static bool trans_vlxbu_v(DisasContext *ctx, arg_vlxbu_v *a);
+typedef arg_rnfvm arg_vlxhu_v;
+static bool trans_vlxhu_v(DisasContext *ctx, arg_vlxhu_v *a);
+typedef arg_rnfvm arg_vlxwu_v;
+static bool trans_vlxwu_v(DisasContext *ctx, arg_vlxwu_v *a);
+typedef arg_rnfvm arg_vsxb_v;
+static bool trans_vsxb_v(DisasContext *ctx, arg_vsxb_v *a);
+typedef arg_rnfvm arg_vsxh_v;
+static bool trans_vsxh_v(DisasContext *ctx, arg_vsxh_v *a);
+typedef arg_rnfvm arg_vsxw_v;
+static bool trans_vsxw_v(DisasContext *ctx, arg_vsxw_v *a);
+typedef arg_rnfvm arg_vsxe_v;
+static bool trans_vsxe_v(DisasContext *ctx, arg_vsxe_v *a);
+typedef arg_rwdvm arg_vamoswapw_v;
+static bool trans_vamoswapw_v(DisasContext *ctx, arg_vamoswapw_v *a);
+typedef arg_rwdvm arg_vamoaddw_v;
+static bool trans_vamoaddw_v(DisasContext *ctx, arg_vamoaddw_v *a);
+typedef arg_rwdvm arg_vamoxorw_v;
+static bool trans_vamoxorw_v(DisasContext *ctx, arg_vamoxorw_v *a);
+typedef arg_rwdvm arg_vamoandw_v;
+static bool trans_vamoandw_v(DisasContext *ctx, arg_vamoandw_v *a);
+typedef arg_rwdvm arg_vamoorw_v;
+static bool trans_vamoorw_v(DisasContext *ctx, arg_vamoorw_v *a);
+typedef arg_rwdvm arg_vamominw_v;
+static bool trans_vamominw_v(DisasContext *ctx, arg_vamominw_v *a);
+typedef arg_rwdvm arg_vamomaxw_v;
+static bool trans_vamomaxw_v(DisasContext *ctx, arg_vamomaxw_v *a);
+typedef arg_rwdvm arg_vamominuw_v;
+static bool trans_vamominuw_v(DisasContext *ctx, arg_vamominuw_v *a);
+typedef arg_rwdvm arg_vamomaxuw_v;
+static bool trans_vamomaxuw_v(DisasContext *ctx, arg_vamomaxuw_v *a);
+typedef arg_rmrr arg_vadd_vv;
+static bool trans_vadd_vv(DisasContext *ctx, arg_vadd_vv *a);
+typedef arg_rmrr arg_vadd_vx;
+static bool trans_vadd_vx(DisasContext *ctx, arg_vadd_vx *a);
+typedef arg_rmrr arg_vadd_vi;
+static bool trans_vadd_vi(DisasContext *ctx, arg_vadd_vi *a);
+typedef arg_rmrr arg_vsub_vv;
+static bool trans_vsub_vv(DisasContext *ctx, arg_vsub_vv *a);
+typedef arg_rmrr arg_vsub_vx;
+static bool trans_vsub_vx(DisasContext *ctx, arg_vsub_vx *a);
+typedef arg_rmrr arg_vrsub_vx;
+static bool trans_vrsub_vx(DisasContext *ctx, arg_vrsub_vx *a);
+typedef arg_rmrr arg_vrsub_vi;
+static bool trans_vrsub_vi(DisasContext *ctx, arg_vrsub_vi *a);
+typedef arg_rmrr arg_vwaddu_vv;
+static bool trans_vwaddu_vv(DisasContext *ctx, arg_vwaddu_vv *a);
+typedef arg_rmrr arg_vwaddu_vx;
+static bool trans_vwaddu_vx(DisasContext *ctx, arg_vwaddu_vx *a);
+typedef arg_rmrr arg_vwadd_vv;
+static bool trans_vwadd_vv(DisasContext *ctx, arg_vwadd_vv *a);
+typedef arg_rmrr arg_vwadd_vx;
+static bool trans_vwadd_vx(DisasContext *ctx, arg_vwadd_vx *a);
+typedef arg_rmrr arg_vwsubu_vv;
+static bool trans_vwsubu_vv(DisasContext *ctx, arg_vwsubu_vv *a);
+typedef arg_rmrr arg_vwsubu_vx;
+static bool trans_vwsubu_vx(DisasContext *ctx, arg_vwsubu_vx *a);
+typedef arg_rmrr arg_vwsub_vv;
+static bool trans_vwsub_vv(DisasContext *ctx, arg_vwsub_vv *a);
+typedef arg_rmrr arg_vwsub_vx;
+static bool trans_vwsub_vx(DisasContext *ctx, arg_vwsub_vx *a);
+typedef arg_rmrr arg_vwaddu_wv;
+static bool trans_vwaddu_wv(DisasContext *ctx, arg_vwaddu_wv *a);
+typedef arg_rmrr arg_vwaddu_wx;
+static bool trans_vwaddu_wx(DisasContext *ctx, arg_vwaddu_wx *a);
+typedef arg_rmrr arg_vwadd_wv;
+static bool trans_vwadd_wv(DisasContext *ctx, arg_vwadd_wv *a);
+typedef arg_rmrr arg_vwadd_wx;
+static bool trans_vwadd_wx(DisasContext *ctx, arg_vwadd_wx *a);
+typedef arg_rmrr arg_vwsubu_wv;
+static bool trans_vwsubu_wv(DisasContext *ctx, arg_vwsubu_wv *a);
+typedef arg_rmrr arg_vwsubu_wx;
+static bool trans_vwsubu_wx(DisasContext *ctx, arg_vwsubu_wx *a);
+typedef arg_rmrr arg_vwsub_wv;
+static bool trans_vwsub_wv(DisasContext *ctx, arg_vwsub_wv *a);
+typedef arg_rmrr arg_vwsub_wx;
+static bool trans_vwsub_wx(DisasContext *ctx, arg_vwsub_wx *a);
+typedef arg_rmrr arg_vadc_vvm;
+static bool trans_vadc_vvm(DisasContext *ctx, arg_vadc_vvm *a);
+typedef arg_rmrr arg_vadc_vxm;
+static bool trans_vadc_vxm(DisasContext *ctx, arg_vadc_vxm *a);
+typedef arg_rmrr arg_vadc_vim;
+static bool trans_vadc_vim(DisasContext *ctx, arg_vadc_vim *a);
+typedef arg_rmrr arg_vmadc_vvm;
+static bool trans_vmadc_vvm(DisasContext *ctx, arg_vmadc_vvm *a);
+typedef arg_rmrr arg_vmadc_vxm;
+static bool trans_vmadc_vxm(DisasContext *ctx, arg_vmadc_vxm *a);
+typedef arg_rmrr arg_vmadc_vim;
+static bool trans_vmadc_vim(DisasContext *ctx, arg_vmadc_vim *a);
+typedef arg_rmrr arg_vsbc_vvm;
+static bool trans_vsbc_vvm(DisasContext *ctx, arg_vsbc_vvm *a);
+typedef arg_rmrr arg_vsbc_vxm;
+static bool trans_vsbc_vxm(DisasContext *ctx, arg_vsbc_vxm *a);
+typedef arg_rmrr arg_vmsbc_vvm;
+static bool trans_vmsbc_vvm(DisasContext *ctx, arg_vmsbc_vvm *a);
+typedef arg_rmrr arg_vmsbc_vxm;
+static bool trans_vmsbc_vxm(DisasContext *ctx, arg_vmsbc_vxm *a);
+typedef arg_rmrr arg_vand_vv;
+static bool trans_vand_vv(DisasContext *ctx, arg_vand_vv *a);
+typedef arg_rmrr arg_vand_vx;
+static bool trans_vand_vx(DisasContext *ctx, arg_vand_vx *a);
+typedef arg_rmrr arg_vand_vi;
+static bool trans_vand_vi(DisasContext *ctx, arg_vand_vi *a);
+typedef arg_rmrr arg_vor_vv;
+static bool trans_vor_vv(DisasContext *ctx, arg_vor_vv *a);
+typedef arg_rmrr arg_vor_vx;
+static bool trans_vor_vx(DisasContext *ctx, arg_vor_vx *a);
+typedef arg_rmrr arg_vor_vi;
+static bool trans_vor_vi(DisasContext *ctx, arg_vor_vi *a);
+typedef arg_rmrr arg_vxor_vv;
+static bool trans_vxor_vv(DisasContext *ctx, arg_vxor_vv *a);
+typedef arg_rmrr arg_vxor_vx;
+static bool trans_vxor_vx(DisasContext *ctx, arg_vxor_vx *a);
+typedef arg_rmrr arg_vxor_vi;
+static bool trans_vxor_vi(DisasContext *ctx, arg_vxor_vi *a);
+typedef arg_rmrr arg_vsll_vv;
+static bool trans_vsll_vv(DisasContext *ctx, arg_vsll_vv *a);
+typedef arg_rmrr arg_vsll_vx;
+static bool trans_vsll_vx(DisasContext *ctx, arg_vsll_vx *a);
+typedef arg_rmrr arg_vsll_vi;
+static bool trans_vsll_vi(DisasContext *ctx, arg_vsll_vi *a);
+typedef arg_rmrr arg_vsrl_vv;
+static bool trans_vsrl_vv(DisasContext *ctx, arg_vsrl_vv *a);
+typedef arg_rmrr arg_vsrl_vx;
+static bool trans_vsrl_vx(DisasContext *ctx, arg_vsrl_vx *a);
+typedef arg_rmrr arg_vsrl_vi;
+static bool trans_vsrl_vi(DisasContext *ctx, arg_vsrl_vi *a);
+typedef arg_rmrr arg_vsra_vv;
+static bool trans_vsra_vv(DisasContext *ctx, arg_vsra_vv *a);
+typedef arg_rmrr arg_vsra_vx;
+static bool trans_vsra_vx(DisasContext *ctx, arg_vsra_vx *a);
+typedef arg_rmrr arg_vsra_vi;
+static bool trans_vsra_vi(DisasContext *ctx, arg_vsra_vi *a);
+typedef arg_rmrr arg_vnsrl_vv;
+static bool trans_vnsrl_vv(DisasContext *ctx, arg_vnsrl_vv *a);
+typedef arg_rmrr arg_vnsrl_vx;
+static bool trans_vnsrl_vx(DisasContext *ctx, arg_vnsrl_vx *a);
+typedef arg_rmrr arg_vnsrl_vi;
+static bool trans_vnsrl_vi(DisasContext *ctx, arg_vnsrl_vi *a);
+typedef arg_rmrr arg_vnsra_vv;
+static bool trans_vnsra_vv(DisasContext *ctx, arg_vnsra_vv *a);
+typedef arg_rmrr arg_vnsra_vx;
+static bool trans_vnsra_vx(DisasContext *ctx, arg_vnsra_vx *a);
+typedef arg_rmrr arg_vnsra_vi;
+static bool trans_vnsra_vi(DisasContext *ctx, arg_vnsra_vi *a);
+typedef arg_rmrr arg_vmseq_vv;
+static bool trans_vmseq_vv(DisasContext *ctx, arg_vmseq_vv *a);
+typedef arg_rmrr arg_vmseq_vx;
+static bool trans_vmseq_vx(DisasContext *ctx, arg_vmseq_vx *a);
+typedef arg_rmrr arg_vmseq_vi;
+static bool trans_vmseq_vi(DisasContext *ctx, arg_vmseq_vi *a);
+typedef arg_rmrr arg_vmsne_vv;
+static bool trans_vmsne_vv(DisasContext *ctx, arg_vmsne_vv *a);
+typedef arg_rmrr arg_vmsne_vx;
+static bool trans_vmsne_vx(DisasContext *ctx, arg_vmsne_vx *a);
+typedef arg_rmrr arg_vmsne_vi;
+static bool trans_vmsne_vi(DisasContext *ctx, arg_vmsne_vi *a);
+typedef arg_rmrr arg_vmsltu_vv;
+static bool trans_vmsltu_vv(DisasContext *ctx, arg_vmsltu_vv *a);
+typedef arg_rmrr arg_vmsltu_vx;
+static bool trans_vmsltu_vx(DisasContext *ctx, arg_vmsltu_vx *a);
+typedef arg_rmrr arg_vmslt_vv;
+static bool trans_vmslt_vv(DisasContext *ctx, arg_vmslt_vv *a);
+typedef arg_rmrr arg_vmslt_vx;
+static bool trans_vmslt_vx(DisasContext *ctx, arg_vmslt_vx *a);
+typedef arg_rmrr arg_vmsleu_vv;
+static bool trans_vmsleu_vv(DisasContext *ctx, arg_vmsleu_vv *a);
+typedef arg_rmrr arg_vmsleu_vx;
+static bool trans_vmsleu_vx(DisasContext *ctx, arg_vmsleu_vx *a);
+typedef arg_rmrr arg_vmsleu_vi;
+static bool trans_vmsleu_vi(DisasContext *ctx, arg_vmsleu_vi *a);
+typedef arg_rmrr arg_vmsle_vv;
+static bool trans_vmsle_vv(DisasContext *ctx, arg_vmsle_vv *a);
+typedef arg_rmrr arg_vmsle_vx;
+static bool trans_vmsle_vx(DisasContext *ctx, arg_vmsle_vx *a);
+typedef arg_rmrr arg_vmsle_vi;
+static bool trans_vmsle_vi(DisasContext *ctx, arg_vmsle_vi *a);
+typedef arg_rmrr arg_vmsgtu_vx;
+static bool trans_vmsgtu_vx(DisasContext *ctx, arg_vmsgtu_vx *a);
+typedef arg_rmrr arg_vmsgtu_vi;
+static bool trans_vmsgtu_vi(DisasContext *ctx, arg_vmsgtu_vi *a);
+typedef arg_rmrr arg_vmsgt_vx;
+static bool trans_vmsgt_vx(DisasContext *ctx, arg_vmsgt_vx *a);
+typedef arg_rmrr arg_vmsgt_vi;
+static bool trans_vmsgt_vi(DisasContext *ctx, arg_vmsgt_vi *a);
+typedef arg_rmrr arg_vminu_vv;
+static bool trans_vminu_vv(DisasContext *ctx, arg_vminu_vv *a);
+typedef arg_rmrr arg_vminu_vx;
+static bool trans_vminu_vx(DisasContext *ctx, arg_vminu_vx *a);
+typedef arg_rmrr arg_vmin_vv;
+static bool trans_vmin_vv(DisasContext *ctx, arg_vmin_vv *a);
+typedef arg_rmrr arg_vmin_vx;
+static bool trans_vmin_vx(DisasContext *ctx, arg_vmin_vx *a);
+typedef arg_rmrr arg_vmaxu_vv;
+static bool trans_vmaxu_vv(DisasContext *ctx, arg_vmaxu_vv *a);
+typedef arg_rmrr arg_vmaxu_vx;
+static bool trans_vmaxu_vx(DisasContext *ctx, arg_vmaxu_vx *a);
+typedef arg_rmrr arg_vmax_vv;
+static bool trans_vmax_vv(DisasContext *ctx, arg_vmax_vv *a);
+typedef arg_rmrr arg_vmax_vx;
+static bool trans_vmax_vx(DisasContext *ctx, arg_vmax_vx *a);
+typedef arg_rmrr arg_vmul_vv;
+static bool trans_vmul_vv(DisasContext *ctx, arg_vmul_vv *a);
+typedef arg_rmrr arg_vmul_vx;
+static bool trans_vmul_vx(DisasContext *ctx, arg_vmul_vx *a);
+typedef arg_rmrr arg_vmulh_vv;
+static bool trans_vmulh_vv(DisasContext *ctx, arg_vmulh_vv *a);
+typedef arg_rmrr arg_vmulh_vx;
+static bool trans_vmulh_vx(DisasContext *ctx, arg_vmulh_vx *a);
+typedef arg_rmrr arg_vmulhu_vv;
+static bool trans_vmulhu_vv(DisasContext *ctx, arg_vmulhu_vv *a);
+typedef arg_rmrr arg_vmulhu_vx;
+static bool trans_vmulhu_vx(DisasContext *ctx, arg_vmulhu_vx *a);
+typedef arg_rmrr arg_vmulhsu_vv;
+static bool trans_vmulhsu_vv(DisasContext *ctx, arg_vmulhsu_vv *a);
+typedef arg_rmrr arg_vmulhsu_vx;
+static bool trans_vmulhsu_vx(DisasContext *ctx, arg_vmulhsu_vx *a);
+typedef arg_rmrr arg_vdivu_vv;
+static bool trans_vdivu_vv(DisasContext *ctx, arg_vdivu_vv *a);
+typedef arg_rmrr arg_vdivu_vx;
+static bool trans_vdivu_vx(DisasContext *ctx, arg_vdivu_vx *a);
+typedef arg_rmrr arg_vdiv_vv;
+static bool trans_vdiv_vv(DisasContext *ctx, arg_vdiv_vv *a);
+typedef arg_rmrr arg_vdiv_vx;
+static bool trans_vdiv_vx(DisasContext *ctx, arg_vdiv_vx *a);
+typedef arg_rmrr arg_vremu_vv;
+static bool trans_vremu_vv(DisasContext *ctx, arg_vremu_vv *a);
+typedef arg_rmrr arg_vremu_vx;
+static bool trans_vremu_vx(DisasContext *ctx, arg_vremu_vx *a);
+typedef arg_rmrr arg_vrem_vv;
+static bool trans_vrem_vv(DisasContext *ctx, arg_vrem_vv *a);
+typedef arg_rmrr arg_vrem_vx;
+static bool trans_vrem_vx(DisasContext *ctx, arg_vrem_vx *a);
+typedef arg_rmrr arg_vwmulu_vv;
+static bool trans_vwmulu_vv(DisasContext *ctx, arg_vwmulu_vv *a);
+typedef arg_rmrr arg_vwmulu_vx;
+static bool trans_vwmulu_vx(DisasContext *ctx, arg_vwmulu_vx *a);
+typedef arg_rmrr arg_vwmulsu_vv;
+static bool trans_vwmulsu_vv(DisasContext *ctx, arg_vwmulsu_vv *a);
+typedef arg_rmrr arg_vwmulsu_vx;
+static bool trans_vwmulsu_vx(DisasContext *ctx, arg_vwmulsu_vx *a);
+typedef arg_rmrr arg_vwmul_vv;
+static bool trans_vwmul_vv(DisasContext *ctx, arg_vwmul_vv *a);
+typedef arg_rmrr arg_vwmul_vx;
+static bool trans_vwmul_vx(DisasContext *ctx, arg_vwmul_vx *a);
+typedef arg_rmrr arg_vmacc_vv;
+static bool trans_vmacc_vv(DisasContext *ctx, arg_vmacc_vv *a);
+typedef arg_rmrr arg_vmacc_vx;
+static bool trans_vmacc_vx(DisasContext *ctx, arg_vmacc_vx *a);
+typedef arg_rmrr arg_vnmsac_vv;
+static bool trans_vnmsac_vv(DisasContext *ctx, arg_vnmsac_vv *a);
+typedef arg_rmrr arg_vnmsac_vx;
+static bool trans_vnmsac_vx(DisasContext *ctx, arg_vnmsac_vx *a);
+typedef arg_rmrr arg_vmadd_vv;
+static bool trans_vmadd_vv(DisasContext *ctx, arg_vmadd_vv *a);
+typedef arg_rmrr arg_vmadd_vx;
+static bool trans_vmadd_vx(DisasContext *ctx, arg_vmadd_vx *a);
+typedef arg_rmrr arg_vnmsub_vv;
+static bool trans_vnmsub_vv(DisasContext *ctx, arg_vnmsub_vv *a);
+typedef arg_rmrr arg_vnmsub_vx;
+static bool trans_vnmsub_vx(DisasContext *ctx, arg_vnmsub_vx *a);
+typedef arg_rmrr arg_vwmaccu_vv;
+static bool trans_vwmaccu_vv(DisasContext *ctx, arg_vwmaccu_vv *a);
+typedef arg_rmrr arg_vwmaccu_vx;
+static bool trans_vwmaccu_vx(DisasContext *ctx, arg_vwmaccu_vx *a);
+typedef arg_rmrr arg_vwmacc_vv;
+static bool trans_vwmacc_vv(DisasContext *ctx, arg_vwmacc_vv *a);
+typedef arg_rmrr arg_vwmacc_vx;
+static bool trans_vwmacc_vx(DisasContext *ctx, arg_vwmacc_vx *a);
+typedef arg_rmrr arg_vwmaccsu_vv;
+static bool trans_vwmaccsu_vv(DisasContext *ctx, arg_vwmaccsu_vv *a);
+typedef arg_rmrr arg_vwmaccsu_vx;
+static bool trans_vwmaccsu_vx(DisasContext *ctx, arg_vwmaccsu_vx *a);
+typedef arg_rmrr arg_vwmaccus_vx;
+static bool trans_vwmaccus_vx(DisasContext *ctx, arg_vwmaccus_vx *a);
+typedef arg_decode_insn3218 arg_vmv_v_v;
+static bool trans_vmv_v_v(DisasContext *ctx, arg_vmv_v_v *a);
+typedef arg_decode_insn3218 arg_vmv_v_x;
+static bool trans_vmv_v_x(DisasContext *ctx, arg_vmv_v_x *a);
+typedef arg_decode_insn3218 arg_vmv_v_i;
+static bool trans_vmv_v_i(DisasContext *ctx, arg_vmv_v_i *a);
+typedef arg_rmrr arg_vmerge_vvm;
+static bool trans_vmerge_vvm(DisasContext *ctx, arg_vmerge_vvm *a);
+typedef arg_rmrr arg_vmerge_vxm;
+static bool trans_vmerge_vxm(DisasContext *ctx, arg_vmerge_vxm *a);
+typedef arg_rmrr arg_vmerge_vim;
+static bool trans_vmerge_vim(DisasContext *ctx, arg_vmerge_vim *a);
+typedef arg_rmrr arg_vsaddu_vv;
+static bool trans_vsaddu_vv(DisasContext *ctx, arg_vsaddu_vv *a);
+typedef arg_rmrr arg_vsaddu_vx;
+static bool trans_vsaddu_vx(DisasContext *ctx, arg_vsaddu_vx *a);
+typedef arg_rmrr arg_vsaddu_vi;
+static bool trans_vsaddu_vi(DisasContext *ctx, arg_vsaddu_vi *a);
+typedef arg_rmrr arg_vsadd_vv;
+static bool trans_vsadd_vv(DisasContext *ctx, arg_vsadd_vv *a);
+typedef arg_rmrr arg_vsadd_vx;
+static bool trans_vsadd_vx(DisasContext *ctx, arg_vsadd_vx *a);
+typedef arg_rmrr arg_vsadd_vi;
+static bool trans_vsadd_vi(DisasContext *ctx, arg_vsadd_vi *a);
+typedef arg_rmrr arg_vssubu_vv;
+static bool trans_vssubu_vv(DisasContext *ctx, arg_vssubu_vv *a);
+typedef arg_rmrr arg_vssubu_vx;
+static bool trans_vssubu_vx(DisasContext *ctx, arg_vssubu_vx *a);
+typedef arg_rmrr arg_vssub_vv;
+static bool trans_vssub_vv(DisasContext *ctx, arg_vssub_vv *a);
+typedef arg_rmrr arg_vssub_vx;
+static bool trans_vssub_vx(DisasContext *ctx, arg_vssub_vx *a);
+typedef arg_rmrr arg_vaadd_vv;
+static bool trans_vaadd_vv(DisasContext *ctx, arg_vaadd_vv *a);
+typedef arg_rmrr arg_vaadd_vx;
+static bool trans_vaadd_vx(DisasContext *ctx, arg_vaadd_vx *a);
+typedef arg_rmrr arg_vaadd_vi;
+static bool trans_vaadd_vi(DisasContext *ctx, arg_vaadd_vi *a);
+typedef arg_rmrr arg_vasub_vv;
+static bool trans_vasub_vv(DisasContext *ctx, arg_vasub_vv *a);
+typedef arg_rmrr arg_vasub_vx;
+static bool trans_vasub_vx(DisasContext *ctx, arg_vasub_vx *a);
+typedef arg_rmrr arg_vsmul_vv;
+static bool trans_vsmul_vv(DisasContext *ctx, arg_vsmul_vv *a);
+typedef arg_rmrr arg_vsmul_vx;
+static bool trans_vsmul_vx(DisasContext *ctx, arg_vsmul_vx *a);
+typedef arg_rmrr arg_vwsmaccu_vv;
+static bool trans_vwsmaccu_vv(DisasContext *ctx, arg_vwsmaccu_vv *a);
+typedef arg_rmrr arg_vwsmaccu_vx;
+static bool trans_vwsmaccu_vx(DisasContext *ctx, arg_vwsmaccu_vx *a);
+typedef arg_rmrr arg_vwsmacc_vv;
+static bool trans_vwsmacc_vv(DisasContext *ctx, arg_vwsmacc_vv *a);
+typedef arg_rmrr arg_vwsmacc_vx;
+static bool trans_vwsmacc_vx(DisasContext *ctx, arg_vwsmacc_vx *a);
+typedef arg_rmrr arg_vwsmaccsu_vv;
+static bool trans_vwsmaccsu_vv(DisasContext *ctx, arg_vwsmaccsu_vv *a);
+typedef arg_rmrr arg_vwsmaccsu_vx;
+static bool trans_vwsmaccsu_vx(DisasContext *ctx, arg_vwsmaccsu_vx *a);
+typedef arg_rmrr arg_vwsmaccus_vx;
+static bool trans_vwsmaccus_vx(DisasContext *ctx, arg_vwsmaccus_vx *a);
+typedef arg_rmrr arg_vssrl_vv;
+static bool trans_vssrl_vv(DisasContext *ctx, arg_vssrl_vv *a);
+typedef arg_rmrr arg_vssrl_vx;
+static bool trans_vssrl_vx(DisasContext *ctx, arg_vssrl_vx *a);
+typedef arg_rmrr arg_vssrl_vi;
+static bool trans_vssrl_vi(DisasContext *ctx, arg_vssrl_vi *a);
+typedef arg_rmrr arg_vssra_vv;
+static bool trans_vssra_vv(DisasContext *ctx, arg_vssra_vv *a);
+typedef arg_rmrr arg_vssra_vx;
+static bool trans_vssra_vx(DisasContext *ctx, arg_vssra_vx *a);
+typedef arg_rmrr arg_vssra_vi;
+static bool trans_vssra_vi(DisasContext *ctx, arg_vssra_vi *a);
+typedef arg_rmrr arg_vnclipu_vv;
+static bool trans_vnclipu_vv(DisasContext *ctx, arg_vnclipu_vv *a);
+typedef arg_rmrr arg_vnclipu_vx;
+static bool trans_vnclipu_vx(DisasContext *ctx, arg_vnclipu_vx *a);
+typedef arg_rmrr arg_vnclipu_vi;
+static bool trans_vnclipu_vi(DisasContext *ctx, arg_vnclipu_vi *a);
+typedef arg_rmrr arg_vnclip_vv;
+static bool trans_vnclip_vv(DisasContext *ctx, arg_vnclip_vv *a);
+typedef arg_rmrr arg_vnclip_vx;
+static bool trans_vnclip_vx(DisasContext *ctx, arg_vnclip_vx *a);
+typedef arg_rmrr arg_vnclip_vi;
+static bool trans_vnclip_vi(DisasContext *ctx, arg_vnclip_vi *a);
+typedef arg_rmrr arg_vfadd_vv;
+static bool trans_vfadd_vv(DisasContext *ctx, arg_vfadd_vv *a);
+typedef arg_rmrr arg_vfadd_vf;
+static bool trans_vfadd_vf(DisasContext *ctx, arg_vfadd_vf *a);
+typedef arg_rmrr arg_vfsub_vv;
+static bool trans_vfsub_vv(DisasContext *ctx, arg_vfsub_vv *a);
+typedef arg_rmrr arg_vfsub_vf;
+static bool trans_vfsub_vf(DisasContext *ctx, arg_vfsub_vf *a);
+typedef arg_rmrr arg_vfrsub_vf;
+static bool trans_vfrsub_vf(DisasContext *ctx, arg_vfrsub_vf *a);
+typedef arg_rmrr arg_vfwadd_vv;
+static bool trans_vfwadd_vv(DisasContext *ctx, arg_vfwadd_vv *a);
+typedef arg_rmrr arg_vfwadd_vf;
+static bool trans_vfwadd_vf(DisasContext *ctx, arg_vfwadd_vf *a);
+typedef arg_rmrr arg_vfwadd_wv;
+static bool trans_vfwadd_wv(DisasContext *ctx, arg_vfwadd_wv *a);
+typedef arg_rmrr arg_vfwadd_wf;
+static bool trans_vfwadd_wf(DisasContext *ctx, arg_vfwadd_wf *a);
+typedef arg_rmrr arg_vfwsub_vv;
+static bool trans_vfwsub_vv(DisasContext *ctx, arg_vfwsub_vv *a);
+typedef arg_rmrr arg_vfwsub_vf;
+static bool trans_vfwsub_vf(DisasContext *ctx, arg_vfwsub_vf *a);
+typedef arg_rmrr arg_vfwsub_wv;
+static bool trans_vfwsub_wv(DisasContext *ctx, arg_vfwsub_wv *a);
+typedef arg_rmrr arg_vfwsub_wf;
+static bool trans_vfwsub_wf(DisasContext *ctx, arg_vfwsub_wf *a);
+typedef arg_rmrr arg_vfmul_vv;
+static bool trans_vfmul_vv(DisasContext *ctx, arg_vfmul_vv *a);
+typedef arg_rmrr arg_vfmul_vf;
+static bool trans_vfmul_vf(DisasContext *ctx, arg_vfmul_vf *a);
+typedef arg_rmrr arg_vfdiv_vv;
+static bool trans_vfdiv_vv(DisasContext *ctx, arg_vfdiv_vv *a);
+typedef arg_rmrr arg_vfdiv_vf;
+static bool trans_vfdiv_vf(DisasContext *ctx, arg_vfdiv_vf *a);
+typedef arg_rmrr arg_vfrdiv_vf;
+static bool trans_vfrdiv_vf(DisasContext *ctx, arg_vfrdiv_vf *a);
+typedef arg_rmrr arg_vfwmul_vv;
+static bool trans_vfwmul_vv(DisasContext *ctx, arg_vfwmul_vv *a);
+typedef arg_rmrr arg_vfwmul_vf;
+static bool trans_vfwmul_vf(DisasContext *ctx, arg_vfwmul_vf *a);
+typedef arg_rmrr arg_vfmacc_vv;
+static bool trans_vfmacc_vv(DisasContext *ctx, arg_vfmacc_vv *a);
+typedef arg_rmrr arg_vfnmacc_vv;
+static bool trans_vfnmacc_vv(DisasContext *ctx, arg_vfnmacc_vv *a);
+typedef arg_rmrr arg_vfnmacc_vf;
+static bool trans_vfnmacc_vf(DisasContext *ctx, arg_vfnmacc_vf *a);
+typedef arg_rmrr arg_vfmacc_vf;
+static bool trans_vfmacc_vf(DisasContext *ctx, arg_vfmacc_vf *a);
+typedef arg_rmrr arg_vfmsac_vv;
+static bool trans_vfmsac_vv(DisasContext *ctx, arg_vfmsac_vv *a);
+typedef arg_rmrr arg_vfmsac_vf;
+static bool trans_vfmsac_vf(DisasContext *ctx, arg_vfmsac_vf *a);
+typedef arg_rmrr arg_vfnmsac_vv;
+static bool trans_vfnmsac_vv(DisasContext *ctx, arg_vfnmsac_vv *a);
+typedef arg_rmrr arg_vfnmsac_vf;
+static bool trans_vfnmsac_vf(DisasContext *ctx, arg_vfnmsac_vf *a);
+typedef arg_rmrr arg_vfmadd_vv;
+static bool trans_vfmadd_vv(DisasContext *ctx, arg_vfmadd_vv *a);
+typedef arg_rmrr arg_vfmadd_vf;
+static bool trans_vfmadd_vf(DisasContext *ctx, arg_vfmadd_vf *a);
+typedef arg_rmrr arg_vfnmadd_vv;
+static bool trans_vfnmadd_vv(DisasContext *ctx, arg_vfnmadd_vv *a);
+typedef arg_rmrr arg_vfnmadd_vf;
+static bool trans_vfnmadd_vf(DisasContext *ctx, arg_vfnmadd_vf *a);
+typedef arg_rmrr arg_vfmsub_vv;
+static bool trans_vfmsub_vv(DisasContext *ctx, arg_vfmsub_vv *a);
+typedef arg_rmrr arg_vfmsub_vf;
+static bool trans_vfmsub_vf(DisasContext *ctx, arg_vfmsub_vf *a);
+typedef arg_rmrr arg_vfnmsub_vv;
+static bool trans_vfnmsub_vv(DisasContext *ctx, arg_vfnmsub_vv *a);
+typedef arg_rmrr arg_vfnmsub_vf;
+static bool trans_vfnmsub_vf(DisasContext *ctx, arg_vfnmsub_vf *a);
+typedef arg_rmrr arg_vfwmacc_vv;
+static bool trans_vfwmacc_vv(DisasContext *ctx, arg_vfwmacc_vv *a);
+typedef arg_rmrr arg_vfwmacc_vf;
+static bool trans_vfwmacc_vf(DisasContext *ctx, arg_vfwmacc_vf *a);
+typedef arg_rmrr arg_vfwnmacc_vv;
+static bool trans_vfwnmacc_vv(DisasContext *ctx, arg_vfwnmacc_vv *a);
+typedef arg_rmrr arg_vfwnmacc_vf;
+static bool trans_vfwnmacc_vf(DisasContext *ctx, arg_vfwnmacc_vf *a);
+typedef arg_rmrr arg_vfwmsac_vv;
+static bool trans_vfwmsac_vv(DisasContext *ctx, arg_vfwmsac_vv *a);
+typedef arg_rmrr arg_vfwmsac_vf;
+static bool trans_vfwmsac_vf(DisasContext *ctx, arg_vfwmsac_vf *a);
+typedef arg_rmrr arg_vfwnmsac_vv;
+static bool trans_vfwnmsac_vv(DisasContext *ctx, arg_vfwnmsac_vv *a);
+typedef arg_rmrr arg_vfwnmsac_vf;
+static bool trans_vfwnmsac_vf(DisasContext *ctx, arg_vfwnmsac_vf *a);
+typedef arg_rmr arg_vfsqrt_v;
+static bool trans_vfsqrt_v(DisasContext *ctx, arg_vfsqrt_v *a);
+typedef arg_rmrr arg_vfmin_vv;
+static bool trans_vfmin_vv(DisasContext *ctx, arg_vfmin_vv *a);
+typedef arg_rmrr arg_vfmin_vf;
+static bool trans_vfmin_vf(DisasContext *ctx, arg_vfmin_vf *a);
+typedef arg_rmrr arg_vfmax_vv;
+static bool trans_vfmax_vv(DisasContext *ctx, arg_vfmax_vv *a);
+typedef arg_rmrr arg_vfmax_vf;
+static bool trans_vfmax_vf(DisasContext *ctx, arg_vfmax_vf *a);
+typedef arg_rmrr arg_vfsgnj_vv;
+static bool trans_vfsgnj_vv(DisasContext *ctx, arg_vfsgnj_vv *a);
+typedef arg_rmrr arg_vfsgnj_vf;
+static bool trans_vfsgnj_vf(DisasContext *ctx, arg_vfsgnj_vf *a);
+typedef arg_rmrr arg_vfsgnjn_vv;
+static bool trans_vfsgnjn_vv(DisasContext *ctx, arg_vfsgnjn_vv *a);
+typedef arg_rmrr arg_vfsgnjn_vf;
+static bool trans_vfsgnjn_vf(DisasContext *ctx, arg_vfsgnjn_vf *a);
+typedef arg_rmrr arg_vfsgnjx_vv;
+static bool trans_vfsgnjx_vv(DisasContext *ctx, arg_vfsgnjx_vv *a);
+typedef arg_rmrr arg_vfsgnjx_vf;
+static bool trans_vfsgnjx_vf(DisasContext *ctx, arg_vfsgnjx_vf *a);
+typedef arg_rmrr arg_vmfeq_vv;
+static bool trans_vmfeq_vv(DisasContext *ctx, arg_vmfeq_vv *a);
+typedef arg_rmrr arg_vmfeq_vf;
+static bool trans_vmfeq_vf(DisasContext *ctx, arg_vmfeq_vf *a);
+typedef arg_rmrr arg_vmfne_vv;
+static bool trans_vmfne_vv(DisasContext *ctx, arg_vmfne_vv *a);
+typedef arg_rmrr arg_vmfne_vf;
+static bool trans_vmfne_vf(DisasContext *ctx, arg_vmfne_vf *a);
+typedef arg_rmrr arg_vmflt_vv;
+static bool trans_vmflt_vv(DisasContext *ctx, arg_vmflt_vv *a);
+typedef arg_rmrr arg_vmflt_vf;
+static bool trans_vmflt_vf(DisasContext *ctx, arg_vmflt_vf *a);
+typedef arg_rmrr arg_vmfle_vv;
+static bool trans_vmfle_vv(DisasContext *ctx, arg_vmfle_vv *a);
+typedef arg_rmrr arg_vmfle_vf;
+static bool trans_vmfle_vf(DisasContext *ctx, arg_vmfle_vf *a);
+typedef arg_rmrr arg_vmfgt_vf;
+static bool trans_vmfgt_vf(DisasContext *ctx, arg_vmfgt_vf *a);
+typedef arg_rmrr arg_vmfge_vf;
+static bool trans_vmfge_vf(DisasContext *ctx, arg_vmfge_vf *a);
+typedef arg_rmrr arg_vmford_vv;
+static bool trans_vmford_vv(DisasContext *ctx, arg_vmford_vv *a);
+typedef arg_rmrr arg_vmford_vf;
+static bool trans_vmford_vf(DisasContext *ctx, arg_vmford_vf *a);
+typedef arg_rmr arg_vfclass_v;
+static bool trans_vfclass_v(DisasContext *ctx, arg_vfclass_v *a);
+typedef arg_rmrr arg_vfmerge_vfm;
+static bool trans_vfmerge_vfm(DisasContext *ctx, arg_vfmerge_vfm *a);
+typedef arg_decode_insn3218 arg_vfmv_v_f;
+static bool trans_vfmv_v_f(DisasContext *ctx, arg_vfmv_v_f *a);
+typedef arg_rmr arg_vfcvt_xu_f_v;
+static bool trans_vfcvt_xu_f_v(DisasContext *ctx, arg_vfcvt_xu_f_v *a);
+typedef arg_rmr arg_vfcvt_x_f_v;
+static bool trans_vfcvt_x_f_v(DisasContext *ctx, arg_vfcvt_x_f_v *a);
+typedef arg_rmr arg_vfcvt_f_xu_v;
+static bool trans_vfcvt_f_xu_v(DisasContext *ctx, arg_vfcvt_f_xu_v *a);
+typedef arg_rmr arg_vfcvt_f_x_v;
+static bool trans_vfcvt_f_x_v(DisasContext *ctx, arg_vfcvt_f_x_v *a);
+typedef arg_rmr arg_vfwcvt_xu_f_v;
+static bool trans_vfwcvt_xu_f_v(DisasContext *ctx, arg_vfwcvt_xu_f_v *a);
+typedef arg_rmr arg_vfwcvt_x_f_v;
+static bool trans_vfwcvt_x_f_v(DisasContext *ctx, arg_vfwcvt_x_f_v *a);
+typedef arg_rmr arg_vfwcvt_f_xu_v;
+static bool trans_vfwcvt_f_xu_v(DisasContext *ctx, arg_vfwcvt_f_xu_v *a);
+typedef arg_rmr arg_vfwcvt_f_x_v;
+static bool trans_vfwcvt_f_x_v(DisasContext *ctx, arg_vfwcvt_f_x_v *a);
+typedef arg_rmr arg_vfwcvt_f_f_v;
+static bool trans_vfwcvt_f_f_v(DisasContext *ctx, arg_vfwcvt_f_f_v *a);
+typedef arg_rmr arg_vfncvt_xu_f_v;
+static bool trans_vfncvt_xu_f_v(DisasContext *ctx, arg_vfncvt_xu_f_v *a);
+typedef arg_rmr arg_vfncvt_x_f_v;
+static bool trans_vfncvt_x_f_v(DisasContext *ctx, arg_vfncvt_x_f_v *a);
+typedef arg_rmr arg_vfncvt_f_xu_v;
+static bool trans_vfncvt_f_xu_v(DisasContext *ctx, arg_vfncvt_f_xu_v *a);
+typedef arg_rmr arg_vfncvt_f_x_v;
+static bool trans_vfncvt_f_x_v(DisasContext *ctx, arg_vfncvt_f_x_v *a);
+typedef arg_rmr arg_vfncvt_f_f_v;
+static bool trans_vfncvt_f_f_v(DisasContext *ctx, arg_vfncvt_f_f_v *a);
+typedef arg_rmrr arg_vredsum_vs;
+static bool trans_vredsum_vs(DisasContext *ctx, arg_vredsum_vs *a);
+typedef arg_rmrr arg_vredand_vs;
+static bool trans_vredand_vs(DisasContext *ctx, arg_vredand_vs *a);
+typedef arg_rmrr arg_vredor_vs;
+static bool trans_vredor_vs(DisasContext *ctx, arg_vredor_vs *a);
+typedef arg_rmrr arg_vredxor_vs;
+static bool trans_vredxor_vs(DisasContext *ctx, arg_vredxor_vs *a);
+typedef arg_rmrr arg_vredminu_vs;
+static bool trans_vredminu_vs(DisasContext *ctx, arg_vredminu_vs *a);
+typedef arg_rmrr arg_vredmin_vs;
+static bool trans_vredmin_vs(DisasContext *ctx, arg_vredmin_vs *a);
+typedef arg_rmrr arg_vredmaxu_vs;
+static bool trans_vredmaxu_vs(DisasContext *ctx, arg_vredmaxu_vs *a);
+typedef arg_rmrr arg_vredmax_vs;
+static bool trans_vredmax_vs(DisasContext *ctx, arg_vredmax_vs *a);
+typedef arg_rmrr arg_vwredsumu_vs;
+static bool trans_vwredsumu_vs(DisasContext *ctx, arg_vwredsumu_vs *a);
+typedef arg_rmrr arg_vwredsum_vs;
+static bool trans_vwredsum_vs(DisasContext *ctx, arg_vwredsum_vs *a);
+typedef arg_rmrr arg_vfredsum_vs;
+static bool trans_vfredsum_vs(DisasContext *ctx, arg_vfredsum_vs *a);
+typedef arg_rmrr arg_vfredmin_vs;
+static bool trans_vfredmin_vs(DisasContext *ctx, arg_vfredmin_vs *a);
+typedef arg_rmrr arg_vfredmax_vs;
+static bool trans_vfredmax_vs(DisasContext *ctx, arg_vfredmax_vs *a);
+typedef arg_rmrr arg_vfwredsum_vs;
+static bool trans_vfwredsum_vs(DisasContext *ctx, arg_vfwredsum_vs *a);
+typedef arg_r arg_vmand_mm;
+static bool trans_vmand_mm(DisasContext *ctx, arg_vmand_mm *a);
+typedef arg_r arg_vmnand_mm;
+static bool trans_vmnand_mm(DisasContext *ctx, arg_vmnand_mm *a);
+typedef arg_r arg_vmandnot_mm;
+static bool trans_vmandnot_mm(DisasContext *ctx, arg_vmandnot_mm *a);
+typedef arg_r arg_vmxor_mm;
+static bool trans_vmxor_mm(DisasContext *ctx, arg_vmxor_mm *a);
+typedef arg_r arg_vmor_mm;
+static bool trans_vmor_mm(DisasContext *ctx, arg_vmor_mm *a);
+typedef arg_r arg_vmnor_mm;
+static bool trans_vmnor_mm(DisasContext *ctx, arg_vmnor_mm *a);
+typedef arg_r arg_vmornot_mm;
+static bool trans_vmornot_mm(DisasContext *ctx, arg_vmornot_mm *a);
+typedef arg_r arg_vmxnor_mm;
+static bool trans_vmxnor_mm(DisasContext *ctx, arg_vmxnor_mm *a);
+typedef arg_rmr arg_vmpopc_m;
+static bool trans_vmpopc_m(DisasContext *ctx, arg_vmpopc_m *a);
+typedef arg_rmr arg_vmfirst_m;
+static bool trans_vmfirst_m(DisasContext *ctx, arg_vmfirst_m *a);
+typedef arg_rmr arg_vmsbf_m;
+static bool trans_vmsbf_m(DisasContext *ctx, arg_vmsbf_m *a);
+typedef arg_rmr arg_vmsif_m;
+static bool trans_vmsif_m(DisasContext *ctx, arg_vmsif_m *a);
+typedef arg_rmr arg_vmsof_m;
+static bool trans_vmsof_m(DisasContext *ctx, arg_vmsof_m *a);
+typedef arg_rmr arg_viota_m;
+static bool trans_viota_m(DisasContext *ctx, arg_viota_m *a);
+typedef arg_decode_insn3219 arg_vid_v;
+static bool trans_vid_v(DisasContext *ctx, arg_vid_v *a);
+typedef arg_r arg_vext_x_v;
+static bool trans_vext_x_v(DisasContext *ctx, arg_vext_x_v *a);
+typedef arg_decode_insn3218 arg_vmv_s_x;
+static bool trans_vmv_s_x(DisasContext *ctx, arg_vmv_s_x *a);
+typedef arg_decode_insn3220 arg_vfmv_f_s;
+static bool trans_vfmv_f_s(DisasContext *ctx, arg_vfmv_f_s *a);
+typedef arg_decode_insn3218 arg_vfmv_s_f;
+static bool trans_vfmv_s_f(DisasContext *ctx, arg_vfmv_s_f *a);
+typedef arg_rmrr arg_vslideup_vx;
+static bool trans_vslideup_vx(DisasContext *ctx, arg_vslideup_vx *a);
+typedef arg_rmrr arg_vslideup_vi;
+static bool trans_vslideup_vi(DisasContext *ctx, arg_vslideup_vi *a);
+typedef arg_rmrr arg_vslide1up_vx;
+static bool trans_vslide1up_vx(DisasContext *ctx, arg_vslide1up_vx *a);
+typedef arg_rmrr arg_vslidedown_vx;
+static bool trans_vslidedown_vx(DisasContext *ctx, arg_vslidedown_vx *a);
+typedef arg_rmrr arg_vslidedown_vi;
+static bool trans_vslidedown_vi(DisasContext *ctx, arg_vslidedown_vi *a);
+typedef arg_rmrr arg_vslide1down_vx;
+static bool trans_vslide1down_vx(DisasContext *ctx, arg_vslide1down_vx *a);
+typedef arg_rmrr arg_vrgather_vv;
+static bool trans_vrgather_vv(DisasContext *ctx, arg_vrgather_vv *a);
+typedef arg_rmrr arg_vrgather_vx;
+static bool trans_vrgather_vx(DisasContext *ctx, arg_vrgather_vx *a);
+typedef arg_rmrr arg_vrgather_vi;
+static bool trans_vrgather_vi(DisasContext *ctx, arg_vrgather_vi *a);
+typedef arg_r arg_vcompress_vm;
+static bool trans_vcompress_vm(DisasContext *ctx, arg_vcompress_vm *a);
+typedef arg_decode_insn3221 arg_vsetvli;
+static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a);
+typedef arg_r arg_vsetvl;
+static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a);
 
 static void decode_insn32_extract_atom_ld(DisasContext *ctx, arg_atomic *a, uint32_t insn)
 {
@@ -378,30 +1112,30 @@ static void decode_insn32_extract_b(DisasContext *ctx, arg_b *a, uint32_t insn)
     a->rs1 = extract32(insn, 15, 5);
 }
 
-static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn329 *a, uint32_t insn)
+static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn)
 {
     a->csr = extract32(insn, 20, 12);
     a->rs1 = extract32(insn, 15, 5);
     a->rd = extract32(insn, 7, 5);
 }
 
-static void decode_insn32_extract_decode_insn32_Fmt_18(DisasContext *ctx, arg_empty *a, uint32_t insn)
+static void decode_insn32_extract_decode_insn32_Fmt_28(DisasContext *ctx, arg_empty *a, uint32_t insn)
 {
 }
 
-static void decode_insn32_extract_decode_insn32_Fmt_19(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn)
+static void decode_insn32_extract_decode_insn32_Fmt_29(DisasContext *ctx, arg_decode_insn3224 *a, uint32_t insn)
 {
     a->pred = extract32(insn, 24, 4);
     a->succ = extract32(insn, 20, 4);
 }
 
-static void decode_insn32_extract_hfence_bvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn)
+static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn)
 {
     a->rs2 = extract32(insn, 20, 5);
     a->rs1 = extract32(insn, 15, 5);
 }
 
-static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn)
+static void decode_insn32_extract_hfence_vvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn)
 {
     a->rs2 = extract32(insn, 20, 5);
     a->rs1 = extract32(insn, 15, 5);
@@ -427,20 +1161,54 @@ static void decode_insn32_extract_r(DisasContext *ctx, arg_r *a, uint32_t insn)
     a->rd = extract32(insn, 7, 5);
 }
 
-static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3213 *a, uint32_t insn)
+static void decode_insn32_extract_r1_vm(DisasContext *ctx, arg_decode_insn3219 *a, uint32_t insn)
+{
+    a->vm = extract32(insn, 25, 1);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3218 *a, uint32_t insn)
+{
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r2_nfvm(DisasContext *ctx, arg_r2nfvm *a, uint32_t insn)
 {
+    a->vm = extract32(insn, 25, 1);
+    a->nf = ex_plus_1(ctx, extract32(insn, 29, 3));
     a->rs1 = extract32(insn, 15, 5);
     a->rd = extract32(insn, 7, 5);
 }
 
-static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3212 *a, uint32_t insn)
+static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3217 *a, uint32_t insn)
 {
     a->rs1 = extract32(insn, 15, 5);
     a->rm = extract32(insn, 12, 3);
     a->rd = extract32(insn, 7, 5);
 }
 
-static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 *a, uint32_t insn)
+static void decode_insn32_extract_r2_vm(DisasContext *ctx, arg_rmr *a, uint32_t insn)
+{
+    a->vm = extract32(insn, 25, 1);
+    a->rs2 = extract32(insn, 20, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r2_zimm(DisasContext *ctx, arg_decode_insn3221 *a, uint32_t insn)
+{
+    a->zimm = extract32(insn, 20, 11);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r2rd(DisasContext *ctx, arg_decode_insn3220 *a, uint32_t insn)
+{
+    a->rs2 = extract32(insn, 20, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn)
 {
     a->rs3 = extract32(insn, 27, 5);
     a->rs2 = extract32(insn, 20, 5);
@@ -449,7 +1217,16 @@ static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 *
     a->rd = extract32(insn, 7, 5);
 }
 
-static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a, uint32_t insn)
+static void decode_insn32_extract_r_nfvm(DisasContext *ctx, arg_rnfvm *a, uint32_t insn)
+{
+    a->vm = extract32(insn, 25, 1);
+    a->nf = ex_plus_1(ctx, extract32(insn, 29, 3));
+    a->rs2 = extract32(insn, 20, 5);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn)
 {
     a->rs2 = extract32(insn, 20, 5);
     a->rs1 = extract32(insn, 15, 5);
@@ -457,6 +1234,39 @@ static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a
     a->rd = extract32(insn, 7, 5);
 }
 
+static void decode_insn32_extract_r_vm(DisasContext *ctx, arg_rmrr *a, uint32_t insn)
+{
+    a->vm = extract32(insn, 25, 1);
+    a->rs2 = extract32(insn, 20, 5);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r_vm_0(DisasContext *ctx, arg_rmrr *a, uint32_t insn)
+{
+    a->vm = 0;
+    a->rs2 = extract32(insn, 20, 5);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r_vm_1(DisasContext *ctx, arg_rmrr *a, uint32_t insn)
+{
+    a->vm = 1;
+    a->rs2 = extract32(insn, 20, 5);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r_wdvm(DisasContext *ctx, arg_rwdvm *a, uint32_t insn)
+{
+    a->wd = extract32(insn, 26, 1);
+    a->vm = extract32(insn, 25, 1);
+    a->rs2 = extract32(insn, 20, 5);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
 static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn)
 {
     a->imm = deposit32(extract32(insn, 7, 5), 5, 27, sextract32(insn, 25, 7));
@@ -464,12 +1274,12 @@ static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn)
     a->rs1 = extract32(insn, 15, 5);
 }
 
-static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn)
+static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3223 *a, uint32_t insn)
 {
     a->rs1 = extract32(insn, 15, 5);
 }
 
-static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn)
+static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn)
 {
     a->rs2 = extract32(insn, 20, 5);
     a->rs1 = extract32(insn, 15, 5);
@@ -493,18 +1303,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
     union {
         arg_atomic f_atomic;
         arg_b f_b;
-        arg_decode_insn3210 f_decode_insn3210;
-        arg_decode_insn3211 f_decode_insn3211;
-        arg_decode_insn3212 f_decode_insn3212;
-        arg_decode_insn3213 f_decode_insn3213;
         arg_decode_insn3214 f_decode_insn3214;
         arg_decode_insn3215 f_decode_insn3215;
         arg_decode_insn3216 f_decode_insn3216;
-        arg_decode_insn329 f_decode_insn329;
+        arg_decode_insn3217 f_decode_insn3217;
+        arg_decode_insn3218 f_decode_insn3218;
+        arg_decode_insn3219 f_decode_insn3219;
+        arg_decode_insn3220 f_decode_insn3220;
+        arg_decode_insn3221 f_decode_insn3221;
+        arg_decode_insn3222 f_decode_insn3222;
+        arg_decode_insn3223 f_decode_insn3223;
+        arg_decode_insn3224 f_decode_insn3224;
         arg_empty f_empty;
         arg_i f_i;
         arg_j f_j;
         arg_r f_r;
+        arg_r2nfvm f_r2nfvm;
+        arg_rmr f_rmr;
+        arg_rmrr f_rmrr;
+        arg_rnfvm f_rnfvm;
+        arg_rwdvm f_rwdvm;
         arg_s f_s;
         arg_shift f_shift;
         arg_u f_u;
@@ -517,45 +1335,227 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:96 */
             if (trans_lb(ctx, &u.f_i)) return true;
             return false;
         case 0x1:
             /* ........ ........ .001.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:97 */
             if (trans_lh(ctx, &u.f_i)) return true;
             return false;
         case 0x2:
             /* ........ ........ .010.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:98 */
             if (trans_lw(ctx, &u.f_i)) return true;
             return false;
         case 0x4:
             /* ........ ........ .100.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:99 */
             if (trans_lbu(ctx, &u.f_i)) return true;
             return false;
         case 0x5:
             /* ........ ........ .101.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:100 */
             if (trans_lhu(ctx, &u.f_i)) return true;
             return false;
         }
         return false;
     case 0x00000007:
         /* ........ ........ ........ .0000111 */
-        decode_insn32_extract_i(ctx, &u.f_i, insn);
         switch ((insn >> 12) & 0x7) {
+        case 0x0:
+            /* ........ ........ .000.... .0000111 */
+            switch ((insn >> 26) & 0x7) {
+            case 0x0:
+                /* ...000.. ........ .000.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...000.0 0000.... .000.... .0000111 */
+                    if (trans_vlbu_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...000.1 0000.... .000.... .0000111 */
+                    if (trans_vlbuff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ...010.. ........ .000.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlsbu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x3:
+                /* ...011.. ........ .000.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxbu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x4:
+                /* ...100.. ........ .000.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...100.0 0000.... .000.... .0000111 */
+                    if (trans_vlb_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...100.1 0000.... .000.... .0000111 */
+                    if (trans_vlbff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x6:
+                /* ...110.. ........ .000.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlsb_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x7:
+                /* ...111.. ........ .000.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxb_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
         case 0x2:
             /* ........ ........ .010.... .0000111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:156 */
+            decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_flw(ctx, &u.f_i)) return true;
             return false;
         case 0x3:
             /* ........ ........ .011.... .0000111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:184 */
+            decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_fld(ctx, &u.f_i)) return true;
             return false;
+        case 0x5:
+            /* ........ ........ .101.... .0000111 */
+            switch ((insn >> 26) & 0x7) {
+            case 0x0:
+                /* ...000.. ........ .101.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...000.0 0000.... .101.... .0000111 */
+                    if (trans_vlhu_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...000.1 0000.... .101.... .0000111 */
+                    if (trans_vlhuff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ...010.. ........ .101.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlshu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x3:
+                /* ...011.. ........ .101.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxhu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x4:
+                /* ...100.. ........ .101.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...100.0 0000.... .101.... .0000111 */
+                    if (trans_vlh_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...100.1 0000.... .101.... .0000111 */
+                    if (trans_vlhff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x6:
+                /* ...110.. ........ .101.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlsh_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x7:
+                /* ...111.. ........ .101.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxh_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
+        case 0x6:
+            /* ........ ........ .110.... .0000111 */
+            switch ((insn >> 26) & 0x7) {
+            case 0x0:
+                /* ...000.. ........ .110.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...000.0 0000.... .110.... .0000111 */
+                    if (trans_vlwu_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...000.1 0000.... .110.... .0000111 */
+                    if (trans_vlwuff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ...010.. ........ .110.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlswu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x3:
+                /* ...011.. ........ .110.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxwu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x4:
+                /* ...100.. ........ .110.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...100.0 0000.... .110.... .0000111 */
+                    if (trans_vlw_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...100.1 0000.... .110.... .0000111 */
+                    if (trans_vlwff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x6:
+                /* ...110.. ........ .110.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlsw_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x7:
+                /* ...111.. ........ .110.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxw_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
+        case 0x7:
+            /* ........ ........ .111.... .0000111 */
+            switch ((insn >> 26) & 0x7) {
+            case 0x0:
+                /* ...000.. ........ .111.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...000.0 0000.... .111.... .0000111 */
+                    if (trans_vle_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...000.1 0000.... .111.... .0000111 */
+                    if (trans_vleff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ...010.. ........ .111.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlse_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x3:
+                /* ...011.. ........ .111.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxe_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
         }
         return false;
     case 0x0000000f:
@@ -563,14 +1563,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .0001111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:123 */
-            decode_insn32_extract_decode_insn32_Fmt_19(ctx, &u.f_decode_insn3216, insn);
-            if (trans_fence(ctx, &u.f_decode_insn3216)) return true;
+            decode_insn32_extract_decode_insn32_Fmt_29(ctx, &u.f_decode_insn3224, insn);
+            if (trans_fence(ctx, &u.f_decode_insn3224)) return true;
             return false;
         case 0x1:
             /* ........ ........ .001.... .0001111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:124 */
-            decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn);
+            decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn);
             if (trans_fence_i(ctx, &u.f_empty)) return true;
             return false;
         }
@@ -580,7 +1578,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:104 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_addi(ctx, &u.f_i)) return true;
             return false;
@@ -590,26 +1587,22 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 30) & 0x3) {
             case 0x0:
                 /* 00...... ........ .001.... .0010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:110 */
                 if (trans_slli(ctx, &u.f_shift)) return true;
                 return false;
             }
             return false;
         case 0x2:
             /* ........ ........ .010.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:105 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_slti(ctx, &u.f_i)) return true;
             return false;
         case 0x3:
             /* ........ ........ .011.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:106 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_sltiu(ctx, &u.f_i)) return true;
             return false;
         case 0x4:
             /* ........ ........ .100.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:107 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_xori(ctx, &u.f_i)) return true;
             return false;
@@ -619,25 +1612,21 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 30) & 0x3) {
             case 0x0:
                 /* 00...... ........ .101.... .0010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:111 */
                 if (trans_srli(ctx, &u.f_shift)) return true;
                 return false;
             case 0x1:
                 /* 01...... ........ .101.... .0010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:112 */
                 if (trans_srai(ctx, &u.f_shift)) return true;
                 return false;
             }
             return false;
         case 0x6:
             /* ........ ........ .110.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:108 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_ori(ctx, &u.f_i)) return true;
             return false;
         case 0x7:
             /* ........ ........ .111.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:109 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_andi(ctx, &u.f_i)) return true;
             return false;
@@ -645,7 +1634,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         return false;
     case 0x00000017:
         /* ........ ........ ........ .0010111 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:87 */
         decode_insn32_extract_u(ctx, &u.f_u, insn);
         if (trans_auipc(ctx, &u.f_u)) return true;
         return false;
@@ -655,35 +1643,151 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .0100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:101 */
             if (trans_sb(ctx, &u.f_s)) return true;
             return false;
         case 0x1:
             /* ........ ........ .001.... .0100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:102 */
             if (trans_sh(ctx, &u.f_s)) return true;
             return false;
         case 0x2:
             /* ........ ........ .010.... .0100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:103 */
             if (trans_sw(ctx, &u.f_s)) return true;
             return false;
         }
         return false;
     case 0x00000027:
         /* ........ ........ ........ .0100111 */
-        decode_insn32_extract_s(ctx, &u.f_s, insn);
         switch ((insn >> 12) & 0x7) {
+        case 0x0:
+            /* ........ ........ .000.... .0100111 */
+            switch ((insn >> 26) & 0x3) {
+            case 0x0:
+                /* ....00.. ........ .000.... .0100111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch (insn & 0x11f00000) {
+                case 0x00000000:
+                    /* ...000.0 0000.... .000.... .0100111 */
+                    if (trans_vsb_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ....10.. ........ .000.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                switch ((insn >> 28) & 0x1) {
+                case 0x0:
+                    /* ...010.. ........ .000.... .0100111 */
+                    if (trans_vssb_v(ctx, &u.f_rnfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x3:
+                /* ....11.. ........ .000.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vsxb_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
         case 0x2:
             /* ........ ........ .010.... .0100111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:157 */
+            decode_insn32_extract_s(ctx, &u.f_s, insn);
             if (trans_fsw(ctx, &u.f_s)) return true;
             return false;
         case 0x3:
             /* ........ ........ .011.... .0100111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:185 */
+            decode_insn32_extract_s(ctx, &u.f_s, insn);
             if (trans_fsd(ctx, &u.f_s)) return true;
             return false;
+        case 0x5:
+            /* ........ ........ .101.... .0100111 */
+            switch ((insn >> 26) & 0x3) {
+            case 0x0:
+                /* ....00.. ........ .101.... .0100111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch (insn & 0x11f00000) {
+                case 0x00000000:
+                    /* ...000.0 0000.... .101.... .0100111 */
+                    if (trans_vsh_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ....10.. ........ .101.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                switch ((insn >> 28) & 0x1) {
+                case 0x0:
+                    /* ...010.. ........ .101.... .0100111 */
+                    if (trans_vssh_v(ctx, &u.f_rnfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x3:
+                /* ....11.. ........ .101.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vsxh_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
+        case 0x6:
+            /* ........ ........ .110.... .0100111 */
+            switch ((insn >> 26) & 0x3) {
+            case 0x0:
+                /* ....00.. ........ .110.... .0100111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch (insn & 0x11f00000) {
+                case 0x00000000:
+                    /* ...000.0 0000.... .110.... .0100111 */
+                    if (trans_vsw_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ....10.. ........ .110.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                switch ((insn >> 28) & 0x1) {
+                case 0x0:
+                    /* ...010.. ........ .110.... .0100111 */
+                    if (trans_vssw_v(ctx, &u.f_rnfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x3:
+                /* ....11.. ........ .110.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vsxw_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
+        case 0x7:
+            /* ........ ........ .111.... .0100111 */
+            switch ((insn >> 26) & 0x3) {
+            case 0x0:
+                /* ....00.. ........ .111.... .0100111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch (insn & 0x11f00000) {
+                case 0x00000000:
+                    /* ...000.0 0000.... .111.... .0100111 */
+                    if (trans_vse_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ....10.. ........ .111.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                switch ((insn >> 28) & 0x1) {
+                case 0x0:
+                    /* ...010.. ........ .111.... .0100111 */
+                    if (trans_vsse_v(ctx, &u.f_rnfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x3:
+                /* ....11.. ........ .111.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vsxe_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
         }
         return false;
     case 0x0000002f:
@@ -691,75 +1795,109 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch (insn & 0xf8007000) {
         case 0x00002000:
             /* 00000... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:146 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoadd_w(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x00006000:
+            /* 00000... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoaddw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0x08002000:
             /* 00001... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:145 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoswap_w(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x08006000:
+            /* 00001... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoswapw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0x10002000:
             /* 00010... ........ .010.... .0101111 */
             decode_insn32_extract_atom_ld(ctx, &u.f_atomic, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 00010..0 0000.... .010.... .0101111 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:143 */
                 if (trans_lr_w(ctx, &u.f_atomic)) return true;
                 return false;
             }
             return false;
         case 0x18002000:
             /* 00011... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:144 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_sc_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0x20002000:
             /* 00100... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:147 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoxor_w(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x20006000:
+            /* 00100... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoxorw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0x40002000:
             /* 01000... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:149 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoor_w(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x40006000:
+            /* 01000... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoorw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0x60002000:
             /* 01100... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:148 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoand_w(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x60006000:
+            /* 01100... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoandw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0x80002000:
             /* 10000... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:150 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amomin_w(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x80006000:
+            /* 10000... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamominw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0xa0002000:
             /* 10100... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:151 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amomax_w(ctx, &u.f_atomic)) return true;
             return false;
+        case 0xa0006000:
+            /* 10100... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamomaxw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0xc0002000:
             /* 11000... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:152 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amominu_w(ctx, &u.f_atomic)) return true;
             return false;
+        case 0xc0006000:
+            /* 11000... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamominuw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0xe0002000:
             /* 11100... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:153 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amomaxu_w(ctx, &u.f_atomic)) return true;
             return false;
+        case 0xe0006000:
+            /* 11100... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamomaxuw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         }
         return false;
     case 0x00000033:
@@ -768,163 +1906,136 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch (insn & 0xfe007000) {
         case 0x00000000:
             /* 0000000. ........ .000.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:113 */
             if (trans_add(ctx, &u.f_r)) return true;
             return false;
         case 0x00001000:
             /* 0000000. ........ .001.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:115 */
             if (trans_sll(ctx, &u.f_r)) return true;
             return false;
         case 0x00002000:
             /* 0000000. ........ .010.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:116 */
             if (trans_slt(ctx, &u.f_r)) return true;
             return false;
         case 0x00003000:
             /* 0000000. ........ .011.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:117 */
             if (trans_sltu(ctx, &u.f_r)) return true;
             return false;
         case 0x00004000:
             /* 0000000. ........ .100.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:118 */
             if (trans_xor(ctx, &u.f_r)) return true;
             return false;
         case 0x00005000:
             /* 0000000. ........ .101.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:119 */
             if (trans_srl(ctx, &u.f_r)) return true;
             return false;
         case 0x00006000:
             /* 0000000. ........ .110.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:121 */
             if (trans_or(ctx, &u.f_r)) return true;
             return false;
         case 0x00007000:
             /* 0000000. ........ .111.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:122 */
             if (trans_and(ctx, &u.f_r)) return true;
             return false;
         case 0x02000000:
             /* 0000001. ........ .000.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:133 */
             if (trans_mul(ctx, &u.f_r)) return true;
             return false;
         case 0x02001000:
             /* 0000001. ........ .001.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:134 */
             if (trans_mulh(ctx, &u.f_r)) return true;
             return false;
         case 0x02002000:
             /* 0000001. ........ .010.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:135 */
             if (trans_mulhsu(ctx, &u.f_r)) return true;
             return false;
         case 0x02003000:
             /* 0000001. ........ .011.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:136 */
             if (trans_mulhu(ctx, &u.f_r)) return true;
             return false;
         case 0x02004000:
             /* 0000001. ........ .100.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:137 */
             if (trans_div(ctx, &u.f_r)) return true;
             return false;
         case 0x02005000:
             /* 0000001. ........ .101.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:138 */
             if (trans_divu(ctx, &u.f_r)) return true;
             return false;
         case 0x02006000:
             /* 0000001. ........ .110.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:139 */
             if (trans_rem(ctx, &u.f_r)) return true;
             return false;
         case 0x02007000:
             /* 0000001. ........ .111.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:140 */
             if (trans_remu(ctx, &u.f_r)) return true;
             return false;
         case 0x40000000:
             /* 0100000. ........ .000.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:114 */
             if (trans_sub(ctx, &u.f_r)) return true;
             return false;
         case 0x40005000:
             /* 0100000. ........ .101.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:120 */
             if (trans_sra(ctx, &u.f_r)) return true;
             return false;
         }
         return false;
     case 0x00000037:
         /* ........ ........ ........ .0110111 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:86 */
         decode_insn32_extract_u(ctx, &u.f_u, insn);
         if (trans_lui(ctx, &u.f_u)) return true;
         return false;
     case 0x00000043:
         /* ........ ........ ........ .1000011 */
-        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn);
+        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn);
         switch ((insn >> 25) & 0x3) {
         case 0x0:
             /* .....00. ........ ........ .1000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:158 */
-            if (trans_fmadd_s(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fmadd_s(ctx, &u.f_decode_insn3215)) return true;
             return false;
         case 0x1:
             /* .....01. ........ ........ .1000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:186 */
-            if (trans_fmadd_d(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fmadd_d(ctx, &u.f_decode_insn3215)) return true;
             return false;
         }
         return false;
     case 0x00000047:
         /* ........ ........ ........ .1000111 */
-        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn);
+        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn);
         switch ((insn >> 25) & 0x3) {
         case 0x0:
             /* .....00. ........ ........ .1000111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:159 */
-            if (trans_fmsub_s(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fmsub_s(ctx, &u.f_decode_insn3215)) return true;
             return false;
         case 0x1:
             /* .....01. ........ ........ .1000111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:187 */
-            if (trans_fmsub_d(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fmsub_d(ctx, &u.f_decode_insn3215)) return true;
             return false;
         }
         return false;
     case 0x0000004b:
         /* ........ ........ ........ .1001011 */
-        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn);
+        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn);
         switch ((insn >> 25) & 0x3) {
         case 0x0:
             /* .....00. ........ ........ .1001011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:160 */
-            if (trans_fnmsub_s(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fnmsub_s(ctx, &u.f_decode_insn3215)) return true;
             return false;
         case 0x1:
             /* .....01. ........ ........ .1001011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:188 */
-            if (trans_fnmsub_d(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fnmsub_d(ctx, &u.f_decode_insn3215)) return true;
             return false;
         }
         return false;
     case 0x0000004f:
         /* ........ ........ ........ .1001111 */
-        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn);
+        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn);
         switch ((insn >> 25) & 0x3) {
         case 0x0:
             /* .....00. ........ ........ .1001111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:161 */
-            if (trans_fnmadd_s(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fnmadd_s(ctx, &u.f_decode_insn3215)) return true;
             return false;
         case 0x1:
             /* .....01. ........ ........ .1001111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:189 */
-            if (trans_fnmadd_d(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fnmadd_d(ctx, &u.f_decode_insn3215)) return true;
             return false;
         }
         return false;
@@ -933,51 +2044,43 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 25) & 0x7f) {
         case 0x0:
             /* 0000000. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:162 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fadd_s(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fadd_s(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x1:
             /* 0000001. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:190 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fadd_d(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fadd_d(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x4:
             /* 0000100. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:163 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fsub_s(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fsub_s(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x5:
             /* 0000101. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:191 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fsub_d(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fsub_d(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x8:
             /* 0001000. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:164 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fmul_s(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fmul_s(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x9:
             /* 0001001. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:192 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fmul_d(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fmul_d(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0xc:
             /* 0001100. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:165 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fdiv_s(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fdiv_s(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0xd:
             /* 0001101. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:193 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fdiv_d(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fdiv_d(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x10:
             /* 0010000. ........ ........ .1010011 */
@@ -985,17 +2088,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 0010000. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:167 */
                 if (trans_fsgnj_s(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 0010000. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:168 */
                 if (trans_fsgnjn_s(ctx, &u.f_r)) return true;
                 return false;
             case 0x2:
                 /* 0010000. ........ .010.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:169 */
                 if (trans_fsgnjx_s(ctx, &u.f_r)) return true;
                 return false;
             }
@@ -1006,17 +2106,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 0010001. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:195 */
                 if (trans_fsgnj_d(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 0010001. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:196 */
                 if (trans_fsgnjn_d(ctx, &u.f_r)) return true;
                 return false;
             case 0x2:
                 /* 0010001. ........ .010.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:197 */
                 if (trans_fsgnjx_d(ctx, &u.f_r)) return true;
                 return false;
             }
@@ -1027,12 +2124,10 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 0010100. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:170 */
                 if (trans_fmin_s(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 0010100. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:171 */
                 if (trans_fmax_s(ctx, &u.f_r)) return true;
                 return false;
             }
@@ -1043,57 +2138,51 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 0010101. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:198 */
                 if (trans_fmin_d(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 0010101. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:199 */
                 if (trans_fmax_d(ctx, &u.f_r)) return true;
                 return false;
             }
             return false;
         case 0x20:
             /* 0100000. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x1:
                 /* 01000000 0001.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:200 */
-                if (trans_fcvt_s_d(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_s_d(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x21:
             /* 0100001. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 01000010 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:201 */
-                if (trans_fcvt_d_s(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_d_s(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x2c:
             /* 0101100. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 01011000 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:166 */
-                if (trans_fsqrt_s(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fsqrt_s(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x2d:
             /* 0101101. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 01011010 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:194 */
-                if (trans_fsqrt_d(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fsqrt_d(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
@@ -1103,17 +2192,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 1010000. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:177 */
                 if (trans_fle_s(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 1010000. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:176 */
                 if (trans_flt_s(ctx, &u.f_r)) return true;
                 return false;
             case 0x2:
                 /* 1010000. ........ .010.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:175 */
                 if (trans_feq_s(ctx, &u.f_r)) return true;
                 return false;
             }
@@ -1124,120 +2210,1726 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 1010001. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:204 */
                 if (trans_fle_d(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 1010001. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:203 */
                 if (trans_flt_d(ctx, &u.f_r)) return true;
                 return false;
             case 0x2:
                 /* 1010001. ........ .010.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:202 */
                 if (trans_feq_d(ctx, &u.f_r)) return true;
                 return false;
             }
             return false;
         case 0x60:
             /* 1100000. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 11000000 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:172 */
-                if (trans_fcvt_w_s(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_w_s(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x1:
                 /* 11000000 0001.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:173 */
-                if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x61:
             /* 1100001. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 11000010 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:206 */
-                if (trans_fcvt_w_d(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_w_d(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x1:
                 /* 11000010 0001.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:207 */
-                if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x68:
             /* 1101000. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 11010000 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:179 */
-                if (trans_fcvt_s_w(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_s_w(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x1:
                 /* 11010000 0001.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:180 */
-                if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x69:
             /* 1101001. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 11010010 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:208 */
-                if (trans_fcvt_d_w(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_d_w(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x1:
                 /* 11010010 0001.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:209 */
-                if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x70:
             /* 1110000. ........ ........ .1010011 */
-            decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn);
+            decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
             switch (insn & 0x01f07000) {
             case 0x00000000:
                 /* 11100000 0000.... .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:174 */
-                if (trans_fmv_x_w(ctx, &u.f_decode_insn3213)) return true;
+                if (trans_fmv_x_w(ctx, &u.f_decode_insn3218)) return true;
                 return false;
             case 0x00001000:
                 /* 11100000 0000.... .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:178 */
-                if (trans_fclass_s(ctx, &u.f_decode_insn3213)) return true;
+                if (trans_fclass_s(ctx, &u.f_decode_insn3218)) return true;
                 return false;
             }
             return false;
         case 0x71:
             /* 1110001. ........ ........ .1010011 */
-            decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn);
+            decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
             switch (insn & 0x01f07000) {
             case 0x00001000:
                 /* 11100010 0000.... .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:205 */
-                if (trans_fclass_d(ctx, &u.f_decode_insn3213)) return true;
+                if (trans_fclass_d(ctx, &u.f_decode_insn3218)) return true;
                 return false;
             }
             return false;
         case 0x78:
             /* 1111000. ........ ........ .1010011 */
-            decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn);
+            decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
             switch (insn & 0x01f07000) {
             case 0x00000000:
                 /* 11110000 0000.... .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:181 */
-                if (trans_fmv_w_x(ctx, &u.f_decode_insn3213)) return true;
+                if (trans_fmv_w_x(ctx, &u.f_decode_insn3218)) return true;
+                return false;
+            }
+            return false;
+        }
+        return false;
+    case 0x00000057:
+        /* ........ ........ ........ .1010111 */
+        switch (insn & 0x80007000) {
+        case 0x00000000:
+            /* 0....... ........ .000.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 000000.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vadd_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 000010.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vsub_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 000100.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vminu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 000101.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmin_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 000110.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmaxu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 000111.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmax_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 001001.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vand_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 001010.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vor_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 001011.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vxor_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 001100.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vrgather_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 010000.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100001. ........ .000.... .1010111 */
+                    if (trans_vadc_vvm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x11:
+                /* 010001.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100011. ........ .000.... .1010111 */
+                    if (trans_vmadc_vvm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x12:
+                /* 010010.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100101. ........ .000.... .1010111 */
+                    if (trans_vsbc_vvm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x13:
+                /* 010011.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100111. ........ .000.... .1010111 */
+                    if (trans_vmsbc_vvm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x17:
+                /* 010111.. ........ .000.... .1010111 */
+                switch ((insn >> 25) & 0x1) {
+                case 0x0:
+                    /* 0101110. ........ .000.... .1010111 */
+                    decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn);
+                    if (trans_vmerge_vvm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 0101111. ........ .000.... .1010111 */
+                    decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                    switch ((insn >> 20) & 0x1f) {
+                    case 0x0:
+                        /* 01011110 0000.... .000.... .1010111 */
+                        if (trans_vmv_v_v(ctx, &u.f_decode_insn3218)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x18:
+                /* 011000.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmseq_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x19:
+                /* 011001.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsne_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1a:
+                /* 011010.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsltu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1b:
+                /* 011011.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmslt_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 011100.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsleu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 011101.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsle_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x00001000:
+            /* 0....... ........ .001.... .1010111 */
+            switch (insn & 0x74000000) {
+            case 0x00000000:
+                /* 0000.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 000000.. ........ .001.... .1010111 */
+                    if (trans_vfadd_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 000010.. ........ .001.... .1010111 */
+                    if (trans_vfsub_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x04000000:
+                /* 0000.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfredsum_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10000000:
+                /* 0001.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 000100.. ........ .001.... .1010111 */
+                    if (trans_vfmin_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 000110.. ........ .001.... .1010111 */
+                    if (trans_vfmax_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x14000000:
+                /* 0001.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 000101.. ........ .001.... .1010111 */
+                    if (trans_vfredmin_vs(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 000111.. ........ .001.... .1010111 */
+                    if (trans_vfredmax_vs(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x20000000:
+                /* 0010.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 001000.. ........ .001.... .1010111 */
+                    if (trans_vfsgnj_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 001010.. ........ .001.... .1010111 */
+                    if (trans_vfsgnjx_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x24000000:
+                /* 0010.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 001001.. ........ .001.... .1010111 */
+                    if (trans_vfsgnjn_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x30000000:
+                /* 0011.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r2rd(ctx, &u.f_decode_insn3220, insn);
+                switch (insn & 0x0a0f8000) {
+                case 0x02000000:
+                    /* 0011001. ....0000 0001.... .1010111 */
+                    if (trans_vfmv_f_s(ctx, &u.f_decode_insn3220)) return true;
+                    return false;
+                }
+                return false;
+            case 0x60000000:
+                /* 0110.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 011000.. ........ .001.... .1010111 */
+                    if (trans_vmfeq_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 011010.. ........ .001.... .1010111 */
+                    if (trans_vmford_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x64000000:
+                /* 0110.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 011001.. ........ .001.... .1010111 */
+                    if (trans_vmfle_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 011011.. ........ .001.... .1010111 */
+                    if (trans_vmflt_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x70000000:
+                /* 0111.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 011100.. ........ .001.... .1010111 */
+                    if (trans_vmfne_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            }
+            return false;
+        case 0x00002000:
+            /* 0....... ........ .010.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 000000.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredsum_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 000001.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredand_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 000010.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredor_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 000011.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredxor_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 000100.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredminu_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 000101.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredmin_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 000110.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredmaxu_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 000111.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredmax_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 001100.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0011001. ........ .010.... .1010111 */
+                    if (trans_vext_x_v(ctx, &u.f_r)) return true;
+                    return false;
+                }
+                return false;
+            case 0x14:
+                /* 010100.. ........ .010.... .1010111 */
+                decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                if (trans_vmpopc_m(ctx, &u.f_rmr)) return true;
+                return false;
+            case 0x15:
+                /* 010101.. ........ .010.... .1010111 */
+                decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                if (trans_vmfirst_m(ctx, &u.f_rmr)) return true;
+                return false;
+            case 0x16:
+                /* 010110.. ........ .010.... .1010111 */
+                switch ((insn >> 15) & 0x1f) {
+                case 0x1:
+                    /* 010110.. ....0000 1010.... .1010111 */
+                    decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                    if (trans_vmsbf_m(ctx, &u.f_rmr)) return true;
+                    return false;
+                case 0x2:
+                    /* 010110.. ....0001 0010.... .1010111 */
+                    decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                    if (trans_vmsof_m(ctx, &u.f_rmr)) return true;
+                    return false;
+                case 0x3:
+                    /* 010110.. ....0001 1010.... .1010111 */
+                    decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                    if (trans_vmsif_m(ctx, &u.f_rmr)) return true;
+                    return false;
+                case 0x10:
+                    /* 010110.. ....1000 0010.... .1010111 */
+                    decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                    if (trans_viota_m(ctx, &u.f_rmr)) return true;
+                    return false;
+                case 0x11:
+                    /* 010110.. ....1000 1010.... .1010111 */
+                    decode_insn32_extract_r1_vm(ctx, &u.f_decode_insn3219, insn);
+                    switch ((insn >> 20) & 0x1f) {
+                    case 0x0:
+                        /* 010110.0 00001000 1010.... .1010111 */
+                        if (trans_vid_v(ctx, &u.f_decode_insn3219)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x17:
+                /* 010111.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vcompress_vm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x18:
+                /* 011000.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmandnot_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x19:
+                /* 011001.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmand_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1a:
+                /* 011010.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmor_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1b:
+                /* 011011.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmxor_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1c:
+                /* 011100.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmornot_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1d:
+                /* 011101.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmnand_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1e:
+                /* 011110.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmnor_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1f:
+                /* 011111.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmxnor_mm(ctx, &u.f_r)) return true;
+                return false;
+            }
+            return false;
+        case 0x00003000:
+            /* 0....... ........ .011.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 000000.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vadd_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 000011.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vrsub_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 001001.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vand_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 001010.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vor_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 001011.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vxor_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 001100.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vrgather_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 001110.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslideup_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 001111.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslidedown_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 010000.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100001. ........ .011.... .1010111 */
+                    if (trans_vadc_vim(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x11:
+                /* 010001.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100011. ........ .011.... .1010111 */
+                    if (trans_vmadc_vim(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x17:
+                /* 010111.. ........ .011.... .1010111 */
+                switch ((insn >> 25) & 0x1) {
+                case 0x0:
+                    /* 0101110. ........ .011.... .1010111 */
+                    decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn);
+                    if (trans_vmerge_vim(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 0101111. ........ .011.... .1010111 */
+                    decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                    switch ((insn >> 20) & 0x1f) {
+                    case 0x0:
+                        /* 01011110 0000.... .011.... .1010111 */
+                        if (trans_vmv_v_i(ctx, &u.f_decode_insn3218)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x18:
+                /* 011000.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmseq_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x19:
+                /* 011001.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsne_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 011100.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsleu_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 011101.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsle_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 011110.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsgtu_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 011111.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsgt_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x00004000:
+            /* 0....... ........ .100.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 000000.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vadd_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 000010.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vsub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 000011.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vrsub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 000100.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vminu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 000101.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmin_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 000110.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmaxu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 000111.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmax_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 001001.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vand_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 001010.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vor_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 001011.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vxor_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 001100.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vrgather_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 001110.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslideup_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 001111.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslidedown_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 010000.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100001. ........ .100.... .1010111 */
+                    if (trans_vadc_vxm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x11:
+                /* 010001.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100011. ........ .100.... .1010111 */
+                    if (trans_vmadc_vxm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x12:
+                /* 010010.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100101. ........ .100.... .1010111 */
+                    if (trans_vsbc_vxm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x13:
+                /* 010011.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100111. ........ .100.... .1010111 */
+                    if (trans_vmsbc_vxm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x17:
+                /* 010111.. ........ .100.... .1010111 */
+                switch ((insn >> 25) & 0x1) {
+                case 0x0:
+                    /* 0101110. ........ .100.... .1010111 */
+                    decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn);
+                    if (trans_vmerge_vxm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 0101111. ........ .100.... .1010111 */
+                    decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                    switch ((insn >> 20) & 0x1f) {
+                    case 0x0:
+                        /* 01011110 0000.... .100.... .1010111 */
+                        if (trans_vmv_v_x(ctx, &u.f_decode_insn3218)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x18:
+                /* 011000.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmseq_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x19:
+                /* 011001.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsne_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1a:
+                /* 011010.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsltu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1b:
+                /* 011011.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmslt_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 011100.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsleu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 011101.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsle_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 011110.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsgtu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 011111.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsgt_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x00005000:
+            /* 0....... ........ .101.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 000000.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfadd_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 000010.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfsub_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 000100.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfmin_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 000110.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfmax_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x8:
+                /* 001000.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfsgnj_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 001001.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfsgnjn_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 001010.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfsgnjx_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 001101.. ........ .101.... .1010111 */
+                decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                switch ((insn >> 20) & 0x3f) {
+                case 0x20:
+                    /* 00110110 0000.... .101.... .1010111 */
+                    if (trans_vfmv_s_f(ctx, &u.f_decode_insn3218)) return true;
+                    return false;
+                }
+                return false;
+            case 0x17:
+                /* 010111.. ........ .101.... .1010111 */
+                switch ((insn >> 25) & 0x1) {
+                case 0x0:
+                    /* 0101110. ........ .101.... .1010111 */
+                    decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn);
+                    if (trans_vfmerge_vfm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 0101111. ........ .101.... .1010111 */
+                    decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                    switch ((insn >> 20) & 0x1f) {
+                    case 0x0:
+                        /* 01011110 0000.... .101.... .1010111 */
+                        if (trans_vfmv_v_f(ctx, &u.f_decode_insn3218)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x18:
+                /* 011000.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmfeq_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x19:
+                /* 011001.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmfle_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1a:
+                /* 011010.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmford_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1b:
+                /* 011011.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmflt_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 011100.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmfne_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 011101.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmfgt_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 011111.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmfge_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x00006000:
+            /* 0....... ........ .110.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0xd:
+                /* 001101.. ........ .110.... .1010111 */
+                decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                switch ((insn >> 20) & 0x3f) {
+                case 0x20:
+                    /* 00110110 0000.... .110.... .1010111 */
+                    if (trans_vmv_s_x(ctx, &u.f_decode_insn3218)) return true;
+                    return false;
+                }
+                return false;
+            case 0xe:
+                /* 001110.. ........ .110.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslide1up_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 001111.. ........ .110.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslide1down_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x00007000:
+            /* 0....... ........ .111.... .1010111 */
+            decode_insn32_extract_r2_zimm(ctx, &u.f_decode_insn3221, insn);
+            if (trans_vsetvli(ctx, &u.f_decode_insn3221)) return true;
+            return false;
+        case 0x80000000:
+            /* 1....... ........ .000.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .000.... .1010111 */
+                if (trans_vsaddu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .000.... .1010111 */
+                if (trans_vsadd_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 100010.. ........ .000.... .1010111 */
+                if (trans_vssubu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 100011.. ........ .000.... .1010111 */
+                if (trans_vssub_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .000.... .1010111 */
+                if (trans_vaadd_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 100101.. ........ .000.... .1010111 */
+                if (trans_vsll_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 100110.. ........ .000.... .1010111 */
+                if (trans_vasub_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 100111.. ........ .000.... .1010111 */
+                if (trans_vsmul_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x8:
+                /* 101000.. ........ .000.... .1010111 */
+                if (trans_vsrl_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .000.... .1010111 */
+                if (trans_vsra_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 101010.. ........ .000.... .1010111 */
+                if (trans_vssrl_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .000.... .1010111 */
+                if (trans_vssra_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 101100.. ........ .000.... .1010111 */
+                if (trans_vnsrl_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .000.... .1010111 */
+                if (trans_vnsra_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 101110.. ........ .000.... .1010111 */
+                if (trans_vnclipu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .000.... .1010111 */
+                if (trans_vnclip_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 110000.. ........ .000.... .1010111 */
+                if (trans_vwredsumu_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x11:
+                /* 110001.. ........ .000.... .1010111 */
+                if (trans_vwredsum_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 111100.. ........ .000.... .1010111 */
+                if (trans_vwsmaccu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 111101.. ........ .000.... .1010111 */
+                if (trans_vwsmacc_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 111110.. ........ .000.... .1010111 */
+                if (trans_vwsmaccsu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80001000:
+            /* 1....... ........ .001.... .1010111 */
+            switch (insn & 0x74000000) {
+            case 0x00000000:
+                /* 1000.0.. ........ .001.... .1010111 */
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 100000.. ........ .001.... .1010111 */
+                    decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                    if (trans_vfdiv_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 100010.. ........ .001.... .1010111 */
+                    decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                    switch ((insn >> 15) & 0x1f) {
+                    case 0x0:
+                        /* 100010.. ....0000 0001.... .1010111 */
+                        if (trans_vfcvt_xu_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x1:
+                        /* 100010.. ....0000 1001.... .1010111 */
+                        if (trans_vfcvt_x_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x2:
+                        /* 100010.. ....0001 0001.... .1010111 */
+                        if (trans_vfcvt_f_xu_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x3:
+                        /* 100010.. ....0001 1001.... .1010111 */
+                        if (trans_vfcvt_f_x_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x8:
+                        /* 100010.. ....0100 0001.... .1010111 */
+                        if (trans_vfwcvt_xu_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x9:
+                        /* 100010.. ....0100 1001.... .1010111 */
+                        if (trans_vfwcvt_x_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0xa:
+                        /* 100010.. ....0101 0001.... .1010111 */
+                        if (trans_vfwcvt_f_xu_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0xb:
+                        /* 100010.. ....0101 1001.... .1010111 */
+                        if (trans_vfwcvt_f_x_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0xc:
+                        /* 100010.. ....0110 0001.... .1010111 */
+                        if (trans_vfwcvt_f_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x10:
+                        /* 100010.. ....1000 0001.... .1010111 */
+                        if (trans_vfncvt_xu_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x11:
+                        /* 100010.. ....1000 1001.... .1010111 */
+                        if (trans_vfncvt_x_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x12:
+                        /* 100010.. ....1001 0001.... .1010111 */
+                        if (trans_vfncvt_f_xu_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x13:
+                        /* 100010.. ....1001 1001.... .1010111 */
+                        if (trans_vfncvt_f_x_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x14:
+                        /* 100010.. ....1010 0001.... .1010111 */
+                        if (trans_vfncvt_f_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x04000000:
+                /* 1000.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                switch (insn & 0x080f8000) {
+                case 0x08000000:
+                    /* 100011.. ....0000 0001.... .1010111 */
+                    if (trans_vfsqrt_v(ctx, &u.f_rmr)) return true;
+                    return false;
+                case 0x08080000:
+                    /* 100011.. ....1000 0001.... .1010111 */
+                    if (trans_vfclass_v(ctx, &u.f_rmr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x10000000:
+                /* 1001.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 100100.. ........ .001.... .1010111 */
+                    if (trans_vfmul_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x20000000:
+                /* 1010.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 101000.. ........ .001.... .1010111 */
+                    if (trans_vfmadd_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 101010.. ........ .001.... .1010111 */
+                    if (trans_vfmsub_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x24000000:
+                /* 1010.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 101001.. ........ .001.... .1010111 */
+                    if (trans_vfnmadd_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 101011.. ........ .001.... .1010111 */
+                    if (trans_vfnmsub_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x30000000:
+                /* 1011.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 101100.. ........ .001.... .1010111 */
+                    if (trans_vfmacc_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 101110.. ........ .001.... .1010111 */
+                    if (trans_vfmsac_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x34000000:
+                /* 1011.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 101101.. ........ .001.... .1010111 */
+                    if (trans_vfnmacc_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 101111.. ........ .001.... .1010111 */
+                    if (trans_vfnmsac_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x40000000:
+                /* 1100.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 110000.. ........ .001.... .1010111 */
+                    if (trans_vfwadd_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 110010.. ........ .001.... .1010111 */
+                    if (trans_vfwsub_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x44000000:
+                /* 1100.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfwredsum_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x50000000:
+                /* 1101.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 110100.. ........ .001.... .1010111 */
+                    if (trans_vfwadd_wv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 110110.. ........ .001.... .1010111 */
+                    if (trans_vfwsub_wv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x60000000:
+                /* 1110.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 111000.. ........ .001.... .1010111 */
+                    if (trans_vfwmul_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x70000000:
+                /* 1111.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 111100.. ........ .001.... .1010111 */
+                    if (trans_vfwmacc_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 111110.. ........ .001.... .1010111 */
+                    if (trans_vfwmsac_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x74000000:
+                /* 1111.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 111101.. ........ .001.... .1010111 */
+                    if (trans_vfwnmacc_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 111111.. ........ .001.... .1010111 */
+                    if (trans_vfwnmsac_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            }
+            return false;
+        case 0x80002000:
+            /* 1....... ........ .010.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .010.... .1010111 */
+                if (trans_vdivu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .010.... .1010111 */
+                if (trans_vdiv_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 100010.. ........ .010.... .1010111 */
+                if (trans_vremu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 100011.. ........ .010.... .1010111 */
+                if (trans_vrem_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .010.... .1010111 */
+                if (trans_vmulhu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 100101.. ........ .010.... .1010111 */
+                if (trans_vmul_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 100110.. ........ .010.... .1010111 */
+                if (trans_vmulhsu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 100111.. ........ .010.... .1010111 */
+                if (trans_vmulh_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .010.... .1010111 */
+                if (trans_vmadd_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .010.... .1010111 */
+                if (trans_vnmsub_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .010.... .1010111 */
+                if (trans_vmacc_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .010.... .1010111 */
+                if (trans_vnmsac_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 110000.. ........ .010.... .1010111 */
+                if (trans_vwaddu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x11:
+                /* 110001.. ........ .010.... .1010111 */
+                if (trans_vwadd_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x12:
+                /* 110010.. ........ .010.... .1010111 */
+                if (trans_vwsubu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x13:
+                /* 110011.. ........ .010.... .1010111 */
+                if (trans_vwsub_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x14:
+                /* 110100.. ........ .010.... .1010111 */
+                if (trans_vwaddu_wv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x15:
+                /* 110101.. ........ .010.... .1010111 */
+                if (trans_vwadd_wv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x16:
+                /* 110110.. ........ .010.... .1010111 */
+                if (trans_vwsubu_wv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x17:
+                /* 110111.. ........ .010.... .1010111 */
+                if (trans_vwsub_wv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x18:
+                /* 111000.. ........ .010.... .1010111 */
+                if (trans_vwmulu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1a:
+                /* 111010.. ........ .010.... .1010111 */
+                if (trans_vwmulsu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1b:
+                /* 111011.. ........ .010.... .1010111 */
+                if (trans_vwmul_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 111100.. ........ .010.... .1010111 */
+                if (trans_vwmaccu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 111101.. ........ .010.... .1010111 */
+                if (trans_vwmacc_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 111110.. ........ .010.... .1010111 */
+                if (trans_vwmaccsu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80003000:
+            /* 1....... ........ .011.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .011.... .1010111 */
+                if (trans_vsaddu_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .011.... .1010111 */
+                if (trans_vsadd_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .011.... .1010111 */
+                if (trans_vaadd_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 100101.. ........ .011.... .1010111 */
+                if (trans_vsll_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x8:
+                /* 101000.. ........ .011.... .1010111 */
+                if (trans_vsrl_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .011.... .1010111 */
+                if (trans_vsra_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 101010.. ........ .011.... .1010111 */
+                if (trans_vssrl_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .011.... .1010111 */
+                if (trans_vssra_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 101100.. ........ .011.... .1010111 */
+                if (trans_vnsrl_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .011.... .1010111 */
+                if (trans_vnsra_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 101110.. ........ .011.... .1010111 */
+                if (trans_vnclipu_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .011.... .1010111 */
+                if (trans_vnclip_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80004000:
+            /* 1....... ........ .100.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .100.... .1010111 */
+                if (trans_vsaddu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .100.... .1010111 */
+                if (trans_vsadd_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 100010.. ........ .100.... .1010111 */
+                if (trans_vssubu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 100011.. ........ .100.... .1010111 */
+                if (trans_vssub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .100.... .1010111 */
+                if (trans_vaadd_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 100101.. ........ .100.... .1010111 */
+                if (trans_vsll_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 100110.. ........ .100.... .1010111 */
+                if (trans_vasub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 100111.. ........ .100.... .1010111 */
+                if (trans_vsmul_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x8:
+                /* 101000.. ........ .100.... .1010111 */
+                if (trans_vsrl_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .100.... .1010111 */
+                if (trans_vsra_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 101010.. ........ .100.... .1010111 */
+                if (trans_vssrl_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .100.... .1010111 */
+                if (trans_vssra_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 101100.. ........ .100.... .1010111 */
+                if (trans_vnsrl_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .100.... .1010111 */
+                if (trans_vnsra_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 101110.. ........ .100.... .1010111 */
+                if (trans_vnclipu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .100.... .1010111 */
+                if (trans_vnclip_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 111100.. ........ .100.... .1010111 */
+                if (trans_vwsmaccu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 111101.. ........ .100.... .1010111 */
+                if (trans_vwsmacc_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 111110.. ........ .100.... .1010111 */
+                if (trans_vwsmaccsu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 111111.. ........ .100.... .1010111 */
+                if (trans_vwsmaccus_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80005000:
+            /* 1....... ........ .101.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .101.... .1010111 */
+                if (trans_vfdiv_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .101.... .1010111 */
+                if (trans_vfrdiv_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .101.... .1010111 */
+                if (trans_vfmul_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 100111.. ........ .101.... .1010111 */
+                if (trans_vfrsub_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x8:
+                /* 101000.. ........ .101.... .1010111 */
+                if (trans_vfmadd_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .101.... .1010111 */
+                if (trans_vfnmadd_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 101010.. ........ .101.... .1010111 */
+                if (trans_vfmsub_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .101.... .1010111 */
+                if (trans_vfnmsub_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 101100.. ........ .101.... .1010111 */
+                if (trans_vfmacc_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .101.... .1010111 */
+                if (trans_vfnmacc_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 101110.. ........ .101.... .1010111 */
+                if (trans_vfmsac_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .101.... .1010111 */
+                if (trans_vfnmsac_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 110000.. ........ .101.... .1010111 */
+                if (trans_vfwadd_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x12:
+                /* 110010.. ........ .101.... .1010111 */
+                if (trans_vfwsub_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x14:
+                /* 110100.. ........ .101.... .1010111 */
+                if (trans_vfwadd_wf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x16:
+                /* 110110.. ........ .101.... .1010111 */
+                if (trans_vfwsub_wf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x18:
+                /* 111000.. ........ .101.... .1010111 */
+                if (trans_vfwmul_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 111100.. ........ .101.... .1010111 */
+                if (trans_vfwmacc_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 111101.. ........ .101.... .1010111 */
+                if (trans_vfwnmacc_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 111110.. ........ .101.... .1010111 */
+                if (trans_vfwmsac_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 111111.. ........ .101.... .1010111 */
+                if (trans_vfwnmsac_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80006000:
+            /* 1....... ........ .110.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .110.... .1010111 */
+                if (trans_vdivu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .110.... .1010111 */
+                if (trans_vdiv_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 100010.. ........ .110.... .1010111 */
+                if (trans_vremu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 100011.. ........ .110.... .1010111 */
+                if (trans_vrem_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .110.... .1010111 */
+                if (trans_vmulhu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 100101.. ........ .110.... .1010111 */
+                if (trans_vmul_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 100110.. ........ .110.... .1010111 */
+                if (trans_vmulhsu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 100111.. ........ .110.... .1010111 */
+                if (trans_vmulh_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .110.... .1010111 */
+                if (trans_vmadd_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .110.... .1010111 */
+                if (trans_vnmsub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .110.... .1010111 */
+                if (trans_vmacc_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .110.... .1010111 */
+                if (trans_vnmsac_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 110000.. ........ .110.... .1010111 */
+                if (trans_vwaddu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x11:
+                /* 110001.. ........ .110.... .1010111 */
+                if (trans_vwadd_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x12:
+                /* 110010.. ........ .110.... .1010111 */
+                if (trans_vwsubu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x13:
+                /* 110011.. ........ .110.... .1010111 */
+                if (trans_vwsub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x14:
+                /* 110100.. ........ .110.... .1010111 */
+                if (trans_vwaddu_wx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x15:
+                /* 110101.. ........ .110.... .1010111 */
+                if (trans_vwadd_wx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x16:
+                /* 110110.. ........ .110.... .1010111 */
+                if (trans_vwsubu_wx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x17:
+                /* 110111.. ........ .110.... .1010111 */
+                if (trans_vwsub_wx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x18:
+                /* 111000.. ........ .110.... .1010111 */
+                if (trans_vwmulu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1a:
+                /* 111010.. ........ .110.... .1010111 */
+                if (trans_vwmulsu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1b:
+                /* 111011.. ........ .110.... .1010111 */
+                if (trans_vwmul_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 111100.. ........ .110.... .1010111 */
+                if (trans_vwmaccu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 111101.. ........ .110.... .1010111 */
+                if (trans_vwmacc_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 111110.. ........ .110.... .1010111 */
+                if (trans_vwmaccsu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 111111.. ........ .110.... .1010111 */
+                if (trans_vwmaccus_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80007000:
+            /* 1....... ........ .111.... .1010111 */
+            decode_insn32_extract_r(ctx, &u.f_r, insn);
+            switch ((insn >> 25) & 0x3f) {
+            case 0x0:
+                /* 1000000. ........ .111.... .1010111 */
+                if (trans_vsetvl(ctx, &u.f_r)) return true;
                 return false;
             }
             return false;
@@ -1249,32 +3941,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:90 */
             if (trans_beq(ctx, &u.f_b)) return true;
             return false;
         case 0x1:
             /* ........ ........ .001.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:91 */
             if (trans_bne(ctx, &u.f_b)) return true;
             return false;
         case 0x4:
             /* ........ ........ .100.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:92 */
             if (trans_blt(ctx, &u.f_b)) return true;
             return false;
         case 0x5:
             /* ........ ........ .101.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:93 */
             if (trans_bge(ctx, &u.f_b)) return true;
             return false;
         case 0x6:
             /* ........ ........ .110.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:94 */
             if (trans_bltu(ctx, &u.f_b)) return true;
             return false;
         case 0x7:
             /* ........ ........ .111.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:95 */
             if (trans_bgeu(ctx, &u.f_b)) return true;
             return false;
         }
@@ -1285,14 +3971,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .1100111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:89 */
             if (trans_jalr(ctx, &u.f_i)) return true;
             return false;
         }
         return false;
     case 0x0000006f:
         /* ........ ........ ........ .1101111 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:88 */
         decode_insn32_extract_j(ctx, &u.f_j, insn);
         if (trans_jal(ctx, &u.f_j)) return true;
         return false;
@@ -1304,21 +3988,18 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch (insn & 0xfe000f80) {
             case 0x00000000:
                 /* 0000000. ........ .0000000 01110011 */
-                decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn);
+                decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn);
                 switch ((insn >> 15) & 0x3ff) {
                 case 0x0:
                     /* 00000000 00000000 00000000 01110011 */
-                    /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:74 */
                     if (trans_ecall(ctx, &u.f_empty)) return true;
                     return false;
                 case 0x20:
                     /* 00000000 00010000 00000000 01110011 */
-                    /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:75 */
                     if (trans_ebreak(ctx, &u.f_empty)) return true;
                     return false;
                 case 0x40:
                     /* 00000000 00100000 00000000 01110011 */
-                    /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:76 */
                     if (trans_uret(ctx, &u.f_empty)) return true;
                     return false;
                 }
@@ -1328,28 +4009,25 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
                 switch ((insn >> 20) & 0x1f) {
                 case 0x2:
                     /* 00010000 0010.... .0000000 01110011 */
-                    decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn);
+                    decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn);
                     switch ((insn >> 15) & 0x1f) {
                     case 0x0:
                         /* 00010000 00100000 00000000 01110011 */
-                        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:77 */
                         if (trans_sret(ctx, &u.f_empty)) return true;
                         return false;
                     }
                     return false;
                 case 0x4:
                     /* 00010000 0100.... .0000000 01110011 */
-                    /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:83 */
-                    decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3215, insn);
-                    if (trans_sfence_vm(ctx, &u.f_decode_insn3215)) return true;
+                    decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3223, insn);
+                    if (trans_sfence_vm(ctx, &u.f_decode_insn3223)) return true;
                     return false;
                 case 0x5:
                     /* 00010000 0101.... .0000000 01110011 */
-                    decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn);
+                    decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn);
                     switch ((insn >> 15) & 0x1f) {
                     case 0x0:
                         /* 00010000 01010000 00000000 01110011 */
-                        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:79 */
                         if (trans_wfi(ctx, &u.f_empty)) return true;
                         return false;
                     }
@@ -1358,70 +4036,60 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
                 return false;
             case 0x12000000:
                 /* 0001001. ........ .0000000 01110011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:82 */
-                decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3214, insn);
-                if (trans_sfence_vma(ctx, &u.f_decode_insn3214)) return true;
+                decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3222, insn);
+                if (trans_sfence_vma(ctx, &u.f_decode_insn3222)) return true;
                 return false;
             case 0x22000000:
                 /* 0010001. ........ .0000000 01110011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:81 */
-                decode_insn32_extract_hfence_bvma(ctx, &u.f_decode_insn3214, insn);
-                if (trans_hfence_bvma(ctx, &u.f_decode_insn3214)) return true;
+                decode_insn32_extract_hfence_vvma(ctx, &u.f_decode_insn3222, insn);
+                if (trans_hfence_vvma(ctx, &u.f_decode_insn3222)) return true;
                 return false;
             case 0x30000000:
                 /* 0011000. ........ .0000000 01110011 */
-                decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn);
+                decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn);
                 switch ((insn >> 15) & 0x3ff) {
                 case 0x40:
                     /* 00110000 00100000 00000000 01110011 */
-                    /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:78 */
                     if (trans_mret(ctx, &u.f_empty)) return true;
                     return false;
                 }
                 return false;
             case 0x62000000:
                 /* 0110001. ........ .0000000 01110011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:80 */
-                decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3214, insn);
-                if (trans_hfence_gvma(ctx, &u.f_decode_insn3214)) return true;
+                decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3222, insn);
+                if (trans_hfence_gvma(ctx, &u.f_decode_insn3222)) return true;
                 return false;
             }
             return false;
         case 0x1:
             /* ........ ........ .001.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:125 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrw(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrw(ctx, &u.f_decode_insn3214)) return true;
             return false;
         case 0x2:
             /* ........ ........ .010.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:126 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrs(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrs(ctx, &u.f_decode_insn3214)) return true;
             return false;
         case 0x3:
             /* ........ ........ .011.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:127 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrc(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrc(ctx, &u.f_decode_insn3214)) return true;
             return false;
         case 0x5:
             /* ........ ........ .101.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:128 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrwi(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrwi(ctx, &u.f_decode_insn3214)) return true;
             return false;
         case 0x6:
             /* ........ ........ .110.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:129 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrsi(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrsi(ctx, &u.f_decode_insn3214)) return true;
             return false;
         case 0x7:
             /* ........ ........ .111.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:130 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrci(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrci(ctx, &u.f_decode_insn3214)) return true;
             return false;
         }
         return false;
diff --git a/qemu/target/riscv/riscv64/decode_insn16.inc.c b/qemu/target/riscv/riscv64/decode_insn16.inc.c
index 719388566f..a3bfbd0d3f 100644
--- a/qemu/target/riscv/riscv64/decode_insn16.inc.c
+++ b/qemu/target/riscv/riscv64/decode_insn16.inc.c
@@ -1,11 +1,9 @@
 /* This file is autogenerated by scripts/decodetree.py.  */
 
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wredundant-decls"
-# ifdef __clang__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wredundant-decls"
+#ifdef __clang__
 #  pragma GCC diagnostic ignored "-Wtypedef-redefinition"
-# endif
 #endif
 
 typedef arg_empty arg_illegal;
@@ -61,9 +59,7 @@ static bool trans_subw(DisasContext *ctx, arg_subw *a);
 typedef arg_r arg_addw;
 static bool trans_addw(DisasContext *ctx, arg_addw *a);
 
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-# pragma GCC diagnostic pop
-#endif
+#pragma GCC diagnostic pop
 
 static void decode_insn16_extract_c_addi16sp(DisasContext *ctx, arg_i *a, uint16_t insn)
 {
@@ -237,30 +233,24 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         /* 000..... ......00 */
         if ((insn & 0x00001fe0) == 0x00000000) {
             /* 00000000 000...00 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:87 */
             decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
-            ctx->invalid = true;
             if (trans_illegal(ctx, &u.f_empty)) return true;
         }
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:88 */
         decode_insn16_extract_c_addi4spn(ctx, &u.f_i, insn);
         if (trans_addi(ctx, &u.f_i)) return true;
         return false;
     case 0x00000001:
         /* 000..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:96 */
         decode_insn16_extract_ci(ctx, &u.f_i, insn);
         if (trans_addi(ctx, &u.f_i)) return true;
         return false;
     case 0x00000002:
         /* 000..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:115 */
         decode_insn16_extract_c_shift2(ctx, &u.f_shift, insn);
         if (trans_slli(ctx, &u.f_shift)) return true;
         return false;
     case 0x00002000:
         /* 001..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:90 */
         decode_insn16_extract_cl_d(ctx, &u.f_i, insn);
         if (trans_fld(ctx, &u.f_i)) return true;
         return false;
@@ -268,29 +258,24 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         /* 001..... ......01 */
         if ((insn & 0x00000f80) == 0x00000000) {
             /* 001.0000 0.....01 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:25 */
             decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
             if (trans_illegal(ctx, &u.f_empty)) return true;
         }
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:26 */
         decode_insn16_extract_ci(ctx, &u.f_i, insn);
         if (trans_addiw(ctx, &u.f_i)) return true;
         return false;
     case 0x00002002:
         /* 001..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:116 */
         decode_insn16_extract_c_ldsp(ctx, &u.f_i, insn);
         if (trans_fld(ctx, &u.f_i)) return true;
         return false;
     case 0x00004000:
         /* 010..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:91 */
         decode_insn16_extract_cl_w(ctx, &u.f_i, insn);
         if (trans_lw(ctx, &u.f_i)) return true;
         return false;
     case 0x00004001:
         /* 010..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:97 */
         decode_insn16_extract_c_li(ctx, &u.f_i, insn);
         if (trans_addi(ctx, &u.f_i)) return true;
         return false;
@@ -298,17 +283,14 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         /* 010..... ......10 */
         if ((insn & 0x00000f80) == 0x00000000) {
             /* 010.0000 0.....10 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:118 */
             decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
             if (trans_illegal(ctx, &u.f_empty)) return true;
         }
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:119 */
         decode_insn16_extract_c_lwsp(ctx, &u.f_i, insn);
         if (trans_lw(ctx, &u.f_i)) return true;
         return false;
     case 0x00006000:
         /* 011..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:20 */
         decode_insn16_extract_cl_d(ctx, &u.f_i, insn);
         if (trans_ld(ctx, &u.f_i)) return true;
         return false;
@@ -316,17 +298,14 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         /* 011..... ......01 */
         if ((insn & 0x0000107c) == 0x00000000) {
             /* 0110.... .0000001 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:99 */
             decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
             if (trans_illegal(ctx, &u.f_empty)) return true;
         }
         if ((insn & 0x00000f80) == 0x00000100) {
             /* 011.0001 0.....01 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:100 */
             decode_insn16_extract_c_addi16sp(ctx, &u.f_i, insn);
             if (trans_addi(ctx, &u.f_i)) return true;
         }
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:101 */
         decode_insn16_extract_c_lui(ctx, &u.f_u, insn);
         if (trans_lui(ctx, &u.f_u)) return true;
         return false;
@@ -334,11 +313,9 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         /* 011..... ......10 */
         if ((insn & 0x00000f80) == 0x00000000) {
             /* 011.0000 0.....10 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:33 */
             decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
             if (trans_illegal(ctx, &u.f_empty)) return true;
         }
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:34 */
         decode_insn16_extract_c_ldsp(ctx, &u.f_i, insn);
         if (trans_ld(ctx, &u.f_i)) return true;
         return false;
@@ -347,19 +324,16 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         switch ((insn >> 10) & 0x3) {
         case 0x0:
             /* 100.00.. ......01 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:103 */
             decode_insn16_extract_c_shift(ctx, &u.f_shift, insn);
             if (trans_srli(ctx, &u.f_shift)) return true;
             return false;
         case 0x1:
             /* 100.01.. ......01 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:104 */
             decode_insn16_extract_c_shift(ctx, &u.f_shift, insn);
             if (trans_srai(ctx, &u.f_shift)) return true;
             return false;
         case 0x2:
             /* 100.10.. ......01 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:105 */
             decode_insn16_extract_c_andi(ctx, &u.f_i, insn);
             if (trans_andi(ctx, &u.f_i)) return true;
             return false;
@@ -369,32 +343,26 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
             switch (insn & 0x00001060) {
             case 0x00000000:
                 /* 100011.. .00...01 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:106 */
                 if (trans_sub(ctx, &u.f_r)) return true;
                 return false;
             case 0x00000020:
                 /* 100011.. .01...01 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:107 */
                 if (trans_xor(ctx, &u.f_r)) return true;
                 return false;
             case 0x00000040:
                 /* 100011.. .10...01 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:108 */
                 if (trans_or(ctx, &u.f_r)) return true;
                 return false;
             case 0x00000060:
                 /* 100011.. .11...01 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:109 */
                 if (trans_and(ctx, &u.f_r)) return true;
                 return false;
             case 0x00001000:
                 /* 100111.. .00...01 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:28 */
                 if (trans_subw(ctx, &u.f_r)) return true;
                 return false;
             case 0x00001020:
                 /* 100111.. .01...01 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:29 */
                 if (trans_addw(ctx, &u.f_r)) return true;
                 return false;
             }
@@ -408,18 +376,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
             /* 1000.... ......10 */
             if ((insn & 0x00000ffc) == 0x00000000) {
                 /* 10000000 00000010 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:122 */
                 decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
                 if (trans_illegal(ctx, &u.f_empty)) return true;
             }
             if ((insn & 0x0000007c) == 0x00000000) {
                 /* 1000.... .0000010 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:123 */
                 decode_insn16_extract_c_jalr(ctx, &u.f_i, insn);
                 u.f_i.rd = 0;
                 if (trans_jalr(ctx, &u.f_i)) return true;
             }
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:124 */
             decode_insn16_extract_c_mv(ctx, &u.f_i, insn);
             if (trans_addi(ctx, &u.f_i)) return true;
             return false;
@@ -427,18 +392,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
             /* 1001.... ......10 */
             if ((insn & 0x00000ffc) == 0x00000000) {
                 /* 10010000 00000010 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:127 */
                 decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn);
                 if (trans_ebreak(ctx, &u.f_empty)) return true;
             }
             if ((insn & 0x0000007c) == 0x00000000) {
                 /* 1001.... .0000010 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:128 */
                 decode_insn16_extract_c_jalr(ctx, &u.f_i, insn);
                 u.f_i.rd = 1;
                 if (trans_jalr(ctx, &u.f_i)) return true;
             }
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:129 */
             decode_insn16_extract_cr(ctx, &u.f_r, insn);
             if (trans_add(ctx, &u.f_r)) return true;
             return false;
@@ -446,56 +408,47 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn)
         return false;
     case 0x0000a000:
         /* 101..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:92 */
         decode_insn16_extract_cs_d(ctx, &u.f_s, insn);
         if (trans_fsd(ctx, &u.f_s)) return true;
         return false;
     case 0x0000a001:
         /* 101..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:110 */
         decode_insn16_extract_cj(ctx, &u.f_j, insn);
         u.f_j.rd = 0;
         if (trans_jal(ctx, &u.f_j)) return true;
         return false;
     case 0x0000a002:
         /* 101..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:131 */
         decode_insn16_extract_c_sdsp(ctx, &u.f_s, insn);
         if (trans_fsd(ctx, &u.f_s)) return true;
         return false;
     case 0x0000c000:
         /* 110..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:93 */
         decode_insn16_extract_cs_w(ctx, &u.f_s, insn);
         if (trans_sw(ctx, &u.f_s)) return true;
         return false;
     case 0x0000c001:
         /* 110..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:111 */
         decode_insn16_extract_cb_z(ctx, &u.f_b, insn);
         if (trans_beq(ctx, &u.f_b)) return true;
         return false;
     case 0x0000c002:
         /* 110..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:132 */
         decode_insn16_extract_c_swsp(ctx, &u.f_s, insn);
         if (trans_sw(ctx, &u.f_s)) return true;
         return false;
     case 0x0000e000:
         /* 111..... ......00 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:21 */
         decode_insn16_extract_cs_d(ctx, &u.f_s, insn);
         if (trans_sd(ctx, &u.f_s)) return true;
         return false;
     case 0x0000e001:
         /* 111..... ......01 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:112 */
         decode_insn16_extract_cb_z(ctx, &u.f_b, insn);
         if (trans_bne(ctx, &u.f_b)) return true;
         return false;
     case 0x0000e002:
         /* 111..... ......10 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:36 */
         decode_insn16_extract_c_sdsp(ctx, &u.f_s, insn);
         if (trans_sd(ctx, &u.f_s)) return true;
         return false;
diff --git a/qemu/target/riscv/riscv64/decode_insn32.inc.c b/qemu/target/riscv/riscv64/decode_insn32.inc.c
index b5d7896091..7ea843c575 100644
--- a/qemu/target/riscv/riscv64/decode_insn32.inc.c
+++ b/qemu/target/riscv/riscv64/decode_insn32.inc.c
@@ -14,56 +14,70 @@ typedef struct {
     int rs2;
 } arg_b;
 
+typedef struct {
+    int csr;
+    int rd;
+    int rs1;
+} arg_decode_insn3214;
+
 typedef struct {
     int rd;
     int rm;
     int rs1;
     int rs2;
     int rs3;
-} arg_decode_insn3210;
+} arg_decode_insn3215;
 
 typedef struct {
     int rd;
     int rm;
     int rs1;
     int rs2;
-} arg_decode_insn3211;
+} arg_decode_insn3216;
 
 typedef struct {
     int rd;
     int rm;
     int rs1;
-} arg_decode_insn3212;
+} arg_decode_insn3217;
 
 typedef struct {
     int rd;
     int rs1;
-} arg_decode_insn3213;
+} arg_decode_insn3218;
 
 typedef struct {
-    int rs1;
+    int rd;
+    int vm;
+} arg_decode_insn3219;
+
+typedef struct {
+    int rd;
     int rs2;
-} arg_decode_insn3214;
+} arg_decode_insn3220;
 
 typedef struct {
+    int rd;
     int rs1;
-} arg_decode_insn3215;
+    int zimm;
+} arg_decode_insn3221;
 
 typedef struct {
-    int pred;
-    int succ;
-} arg_decode_insn3216;
+    int rs1;
+    int rs2;
+} arg_decode_insn3222;
 
 typedef struct {
-    int csr;
-    int rd;
     int rs1;
-} arg_decode_insn329;
+} arg_decode_insn3223;
+
+typedef struct {
+    int pred;
+    int succ;
+} arg_decode_insn3224;
 
 typedef struct {
-#ifdef _MSC_VER
-    int dummy;  // MSVC does not allow empty struct
-#endif
+    int : 0;
 } arg_empty;
 
 typedef struct {
@@ -83,6 +97,42 @@ typedef struct {
     int rs2;
 } arg_r;
 
+typedef struct {
+    int nf;
+    int rd;
+    int rs1;
+    int vm;
+} arg_r2nfvm;
+
+typedef struct {
+    int rd;
+    int rs2;
+    int vm;
+} arg_rmr;
+
+typedef struct {
+    int rd;
+    int rs1;
+    int rs2;
+    int vm;
+} arg_rmrr;
+
+typedef struct {
+    int nf;
+    int rd;
+    int rs1;
+    int rs2;
+    int vm;
+} arg_rnfvm;
+
+typedef struct {
+    int rd;
+    int rs1;
+    int rs2;
+    int vm;
+    int wd;
+} arg_rwdvm;
+
 typedef struct {
     int imm;
     int rs1;
@@ -112,13 +162,9 @@ typedef arg_empty arg_mret;
 static bool trans_mret(DisasContext *ctx, arg_mret *a);
 typedef arg_empty arg_wfi;
 static bool trans_wfi(DisasContext *ctx, arg_wfi *a);
-typedef arg_decode_insn3214 arg_hfence_gvma;
-static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a);
-typedef arg_decode_insn3214 arg_hfence_bvma;
-static bool trans_hfence_bvma(DisasContext *ctx, arg_hfence_bvma *a);
-typedef arg_decode_insn3214 arg_sfence_vma;
+typedef arg_decode_insn3222 arg_sfence_vma;
 static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a);
-typedef arg_decode_insn3215 arg_sfence_vm;
+typedef arg_decode_insn3223 arg_sfence_vm;
 static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a);
 typedef arg_u arg_lui;
 static bool trans_lui(DisasContext *ctx, arg_lui *a);
@@ -194,21 +240,21 @@ typedef arg_r arg_or;
 static bool trans_or(DisasContext *ctx, arg_or *a);
 typedef arg_r arg_and;
 static bool trans_and(DisasContext *ctx, arg_and *a);
-typedef arg_decode_insn3216 arg_fence;
+typedef arg_decode_insn3224 arg_fence;
 static bool trans_fence(DisasContext *ctx, arg_fence *a);
 typedef arg_empty arg_fence_i;
 static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a);
-typedef arg_decode_insn329 arg_csrrw;
+typedef arg_decode_insn3214 arg_csrrw;
 static bool trans_csrrw(DisasContext *ctx, arg_csrrw *a);
-typedef arg_decode_insn329 arg_csrrs;
+typedef arg_decode_insn3214 arg_csrrs;
 static bool trans_csrrs(DisasContext *ctx, arg_csrrs *a);
-typedef arg_decode_insn329 arg_csrrc;
+typedef arg_decode_insn3214 arg_csrrc;
 static bool trans_csrrc(DisasContext *ctx, arg_csrrc *a);
-typedef arg_decode_insn329 arg_csrrwi;
+typedef arg_decode_insn3214 arg_csrrwi;
 static bool trans_csrrwi(DisasContext *ctx, arg_csrrwi *a);
-typedef arg_decode_insn329 arg_csrrsi;
+typedef arg_decode_insn3214 arg_csrrsi;
 static bool trans_csrrsi(DisasContext *ctx, arg_csrrsi *a);
-typedef arg_decode_insn329 arg_csrrci;
+typedef arg_decode_insn3214 arg_csrrci;
 static bool trans_csrrci(DisasContext *ctx, arg_csrrci *a);
 typedef arg_r arg_mul;
 static bool trans_mul(DisasContext *ctx, arg_mul *a);
@@ -252,23 +298,23 @@ typedef arg_i arg_flw;
 static bool trans_flw(DisasContext *ctx, arg_flw *a);
 typedef arg_s arg_fsw;
 static bool trans_fsw(DisasContext *ctx, arg_fsw *a);
-typedef arg_decode_insn3210 arg_fmadd_s;
+typedef arg_decode_insn3215 arg_fmadd_s;
 static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a);
-typedef arg_decode_insn3210 arg_fmsub_s;
+typedef arg_decode_insn3215 arg_fmsub_s;
 static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a);
-typedef arg_decode_insn3210 arg_fnmsub_s;
+typedef arg_decode_insn3215 arg_fnmsub_s;
 static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a);
-typedef arg_decode_insn3210 arg_fnmadd_s;
+typedef arg_decode_insn3215 arg_fnmadd_s;
 static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a);
-typedef arg_decode_insn3211 arg_fadd_s;
+typedef arg_decode_insn3216 arg_fadd_s;
 static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a);
-typedef arg_decode_insn3211 arg_fsub_s;
+typedef arg_decode_insn3216 arg_fsub_s;
 static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a);
-typedef arg_decode_insn3211 arg_fmul_s;
+typedef arg_decode_insn3216 arg_fmul_s;
 static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a);
-typedef arg_decode_insn3211 arg_fdiv_s;
+typedef arg_decode_insn3216 arg_fdiv_s;
 static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a);
-typedef arg_decode_insn3212 arg_fsqrt_s;
+typedef arg_decode_insn3217 arg_fsqrt_s;
 static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a);
 typedef arg_r arg_fsgnj_s;
 static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a);
@@ -280,11 +326,11 @@ typedef arg_r arg_fmin_s;
 static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a);
 typedef arg_r arg_fmax_s;
 static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a);
-typedef arg_decode_insn3212 arg_fcvt_w_s;
+typedef arg_decode_insn3217 arg_fcvt_w_s;
 static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a);
-typedef arg_decode_insn3212 arg_fcvt_wu_s;
+typedef arg_decode_insn3217 arg_fcvt_wu_s;
 static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a);
-typedef arg_decode_insn3213 arg_fmv_x_w;
+typedef arg_decode_insn3218 arg_fmv_x_w;
 static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a);
 typedef arg_r arg_feq_s;
 static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a);
@@ -292,35 +338,35 @@ typedef arg_r arg_flt_s;
 static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a);
 typedef arg_r arg_fle_s;
 static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a);
-typedef arg_decode_insn3213 arg_fclass_s;
+typedef arg_decode_insn3218 arg_fclass_s;
 static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a);
-typedef arg_decode_insn3212 arg_fcvt_s_w;
+typedef arg_decode_insn3217 arg_fcvt_s_w;
 static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a);
-typedef arg_decode_insn3212 arg_fcvt_s_wu;
+typedef arg_decode_insn3217 arg_fcvt_s_wu;
 static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a);
-typedef arg_decode_insn3213 arg_fmv_w_x;
+typedef arg_decode_insn3218 arg_fmv_w_x;
 static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a);
 typedef arg_i arg_fld;
 static bool trans_fld(DisasContext *ctx, arg_fld *a);
 typedef arg_s arg_fsd;
 static bool trans_fsd(DisasContext *ctx, arg_fsd *a);
-typedef arg_decode_insn3210 arg_fmadd_d;
+typedef arg_decode_insn3215 arg_fmadd_d;
 static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a);
-typedef arg_decode_insn3210 arg_fmsub_d;
+typedef arg_decode_insn3215 arg_fmsub_d;
 static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a);
-typedef arg_decode_insn3210 arg_fnmsub_d;
+typedef arg_decode_insn3215 arg_fnmsub_d;
 static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a);
-typedef arg_decode_insn3210 arg_fnmadd_d;
+typedef arg_decode_insn3215 arg_fnmadd_d;
 static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a);
-typedef arg_decode_insn3211 arg_fadd_d;
+typedef arg_decode_insn3216 arg_fadd_d;
 static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a);
-typedef arg_decode_insn3211 arg_fsub_d;
+typedef arg_decode_insn3216 arg_fsub_d;
 static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a);
-typedef arg_decode_insn3211 arg_fmul_d;
+typedef arg_decode_insn3216 arg_fmul_d;
 static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a);
-typedef arg_decode_insn3211 arg_fdiv_d;
+typedef arg_decode_insn3216 arg_fdiv_d;
 static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a);
-typedef arg_decode_insn3212 arg_fsqrt_d;
+typedef arg_decode_insn3217 arg_fsqrt_d;
 static bool trans_fsqrt_d(DisasContext *ctx, arg_fsqrt_d *a);
 typedef arg_r arg_fsgnj_d;
 static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a);
@@ -332,9 +378,9 @@ typedef arg_r arg_fmin_d;
 static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a);
 typedef arg_r arg_fmax_d;
 static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a);
-typedef arg_decode_insn3212 arg_fcvt_s_d;
+typedef arg_decode_insn3217 arg_fcvt_s_d;
 static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a);
-typedef arg_decode_insn3212 arg_fcvt_d_s;
+typedef arg_decode_insn3217 arg_fcvt_d_s;
 static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a);
 typedef arg_r arg_feq_d;
 static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a);
@@ -342,16 +388,704 @@ typedef arg_r arg_flt_d;
 static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a);
 typedef arg_r arg_fle_d;
 static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a);
-typedef arg_decode_insn3213 arg_fclass_d;
+typedef arg_decode_insn3218 arg_fclass_d;
 static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a);
-typedef arg_decode_insn3212 arg_fcvt_w_d;
+typedef arg_decode_insn3217 arg_fcvt_w_d;
 static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a);
-typedef arg_decode_insn3212 arg_fcvt_wu_d;
+typedef arg_decode_insn3217 arg_fcvt_wu_d;
 static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a);
-typedef arg_decode_insn3212 arg_fcvt_d_w;
+typedef arg_decode_insn3217 arg_fcvt_d_w;
 static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a);
-typedef arg_decode_insn3212 arg_fcvt_d_wu;
+typedef arg_decode_insn3217 arg_fcvt_d_wu;
 static bool trans_fcvt_d_wu(DisasContext *ctx, arg_fcvt_d_wu *a);
+typedef arg_decode_insn3222 arg_hfence_gvma;
+static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a);
+typedef arg_decode_insn3222 arg_hfence_vvma;
+static bool trans_hfence_vvma(DisasContext *ctx, arg_hfence_vvma *a);
+typedef arg_r2nfvm arg_vlb_v;
+static bool trans_vlb_v(DisasContext *ctx, arg_vlb_v *a);
+typedef arg_r2nfvm arg_vlh_v;
+static bool trans_vlh_v(DisasContext *ctx, arg_vlh_v *a);
+typedef arg_r2nfvm arg_vlw_v;
+static bool trans_vlw_v(DisasContext *ctx, arg_vlw_v *a);
+typedef arg_r2nfvm arg_vle_v;
+static bool trans_vle_v(DisasContext *ctx, arg_vle_v *a);
+typedef arg_r2nfvm arg_vlbu_v;
+static bool trans_vlbu_v(DisasContext *ctx, arg_vlbu_v *a);
+typedef arg_r2nfvm arg_vlhu_v;
+static bool trans_vlhu_v(DisasContext *ctx, arg_vlhu_v *a);
+typedef arg_r2nfvm arg_vlwu_v;
+static bool trans_vlwu_v(DisasContext *ctx, arg_vlwu_v *a);
+typedef arg_r2nfvm arg_vlbff_v;
+static bool trans_vlbff_v(DisasContext *ctx, arg_vlbff_v *a);
+typedef arg_r2nfvm arg_vlhff_v;
+static bool trans_vlhff_v(DisasContext *ctx, arg_vlhff_v *a);
+typedef arg_r2nfvm arg_vlwff_v;
+static bool trans_vlwff_v(DisasContext *ctx, arg_vlwff_v *a);
+typedef arg_r2nfvm arg_vleff_v;
+static bool trans_vleff_v(DisasContext *ctx, arg_vleff_v *a);
+typedef arg_r2nfvm arg_vlbuff_v;
+static bool trans_vlbuff_v(DisasContext *ctx, arg_vlbuff_v *a);
+typedef arg_r2nfvm arg_vlhuff_v;
+static bool trans_vlhuff_v(DisasContext *ctx, arg_vlhuff_v *a);
+typedef arg_r2nfvm arg_vlwuff_v;
+static bool trans_vlwuff_v(DisasContext *ctx, arg_vlwuff_v *a);
+typedef arg_r2nfvm arg_vsb_v;
+static bool trans_vsb_v(DisasContext *ctx, arg_vsb_v *a);
+typedef arg_r2nfvm arg_vsh_v;
+static bool trans_vsh_v(DisasContext *ctx, arg_vsh_v *a);
+typedef arg_r2nfvm arg_vsw_v;
+static bool trans_vsw_v(DisasContext *ctx, arg_vsw_v *a);
+typedef arg_r2nfvm arg_vse_v;
+static bool trans_vse_v(DisasContext *ctx, arg_vse_v *a);
+typedef arg_rnfvm arg_vlsb_v;
+static bool trans_vlsb_v(DisasContext *ctx, arg_vlsb_v *a);
+typedef arg_rnfvm arg_vlsh_v;
+static bool trans_vlsh_v(DisasContext *ctx, arg_vlsh_v *a);
+typedef arg_rnfvm arg_vlsw_v;
+static bool trans_vlsw_v(DisasContext *ctx, arg_vlsw_v *a);
+typedef arg_rnfvm arg_vlse_v;
+static bool trans_vlse_v(DisasContext *ctx, arg_vlse_v *a);
+typedef arg_rnfvm arg_vlsbu_v;
+static bool trans_vlsbu_v(DisasContext *ctx, arg_vlsbu_v *a);
+typedef arg_rnfvm arg_vlshu_v;
+static bool trans_vlshu_v(DisasContext *ctx, arg_vlshu_v *a);
+typedef arg_rnfvm arg_vlswu_v;
+static bool trans_vlswu_v(DisasContext *ctx, arg_vlswu_v *a);
+typedef arg_rnfvm arg_vssb_v;
+static bool trans_vssb_v(DisasContext *ctx, arg_vssb_v *a);
+typedef arg_rnfvm arg_vssh_v;
+static bool trans_vssh_v(DisasContext *ctx, arg_vssh_v *a);
+typedef arg_rnfvm arg_vssw_v;
+static bool trans_vssw_v(DisasContext *ctx, arg_vssw_v *a);
+typedef arg_rnfvm arg_vsse_v;
+static bool trans_vsse_v(DisasContext *ctx, arg_vsse_v *a);
+typedef arg_rnfvm arg_vlxb_v;
+static bool trans_vlxb_v(DisasContext *ctx, arg_vlxb_v *a);
+typedef arg_rnfvm arg_vlxh_v;
+static bool trans_vlxh_v(DisasContext *ctx, arg_vlxh_v *a);
+typedef arg_rnfvm arg_vlxw_v;
+static bool trans_vlxw_v(DisasContext *ctx, arg_vlxw_v *a);
+typedef arg_rnfvm arg_vlxe_v;
+static bool trans_vlxe_v(DisasContext *ctx, arg_vlxe_v *a);
+typedef arg_rnfvm arg_vlxbu_v;
+static bool trans_vlxbu_v(DisasContext *ctx, arg_vlxbu_v *a);
+typedef arg_rnfvm arg_vlxhu_v;
+static bool trans_vlxhu_v(DisasContext *ctx, arg_vlxhu_v *a);
+typedef arg_rnfvm arg_vlxwu_v;
+static bool trans_vlxwu_v(DisasContext *ctx, arg_vlxwu_v *a);
+typedef arg_rnfvm arg_vsxb_v;
+static bool trans_vsxb_v(DisasContext *ctx, arg_vsxb_v *a);
+typedef arg_rnfvm arg_vsxh_v;
+static bool trans_vsxh_v(DisasContext *ctx, arg_vsxh_v *a);
+typedef arg_rnfvm arg_vsxw_v;
+static bool trans_vsxw_v(DisasContext *ctx, arg_vsxw_v *a);
+typedef arg_rnfvm arg_vsxe_v;
+static bool trans_vsxe_v(DisasContext *ctx, arg_vsxe_v *a);
+typedef arg_rwdvm arg_vamoswapw_v;
+static bool trans_vamoswapw_v(DisasContext *ctx, arg_vamoswapw_v *a);
+typedef arg_rwdvm arg_vamoaddw_v;
+static bool trans_vamoaddw_v(DisasContext *ctx, arg_vamoaddw_v *a);
+typedef arg_rwdvm arg_vamoxorw_v;
+static bool trans_vamoxorw_v(DisasContext *ctx, arg_vamoxorw_v *a);
+typedef arg_rwdvm arg_vamoandw_v;
+static bool trans_vamoandw_v(DisasContext *ctx, arg_vamoandw_v *a);
+typedef arg_rwdvm arg_vamoorw_v;
+static bool trans_vamoorw_v(DisasContext *ctx, arg_vamoorw_v *a);
+typedef arg_rwdvm arg_vamominw_v;
+static bool trans_vamominw_v(DisasContext *ctx, arg_vamominw_v *a);
+typedef arg_rwdvm arg_vamomaxw_v;
+static bool trans_vamomaxw_v(DisasContext *ctx, arg_vamomaxw_v *a);
+typedef arg_rwdvm arg_vamominuw_v;
+static bool trans_vamominuw_v(DisasContext *ctx, arg_vamominuw_v *a);
+typedef arg_rwdvm arg_vamomaxuw_v;
+static bool trans_vamomaxuw_v(DisasContext *ctx, arg_vamomaxuw_v *a);
+typedef arg_rmrr arg_vadd_vv;
+static bool trans_vadd_vv(DisasContext *ctx, arg_vadd_vv *a);
+typedef arg_rmrr arg_vadd_vx;
+static bool trans_vadd_vx(DisasContext *ctx, arg_vadd_vx *a);
+typedef arg_rmrr arg_vadd_vi;
+static bool trans_vadd_vi(DisasContext *ctx, arg_vadd_vi *a);
+typedef arg_rmrr arg_vsub_vv;
+static bool trans_vsub_vv(DisasContext *ctx, arg_vsub_vv *a);
+typedef arg_rmrr arg_vsub_vx;
+static bool trans_vsub_vx(DisasContext *ctx, arg_vsub_vx *a);
+typedef arg_rmrr arg_vrsub_vx;
+static bool trans_vrsub_vx(DisasContext *ctx, arg_vrsub_vx *a);
+typedef arg_rmrr arg_vrsub_vi;
+static bool trans_vrsub_vi(DisasContext *ctx, arg_vrsub_vi *a);
+typedef arg_rmrr arg_vwaddu_vv;
+static bool trans_vwaddu_vv(DisasContext *ctx, arg_vwaddu_vv *a);
+typedef arg_rmrr arg_vwaddu_vx;
+static bool trans_vwaddu_vx(DisasContext *ctx, arg_vwaddu_vx *a);
+typedef arg_rmrr arg_vwadd_vv;
+static bool trans_vwadd_vv(DisasContext *ctx, arg_vwadd_vv *a);
+typedef arg_rmrr arg_vwadd_vx;
+static bool trans_vwadd_vx(DisasContext *ctx, arg_vwadd_vx *a);
+typedef arg_rmrr arg_vwsubu_vv;
+static bool trans_vwsubu_vv(DisasContext *ctx, arg_vwsubu_vv *a);
+typedef arg_rmrr arg_vwsubu_vx;
+static bool trans_vwsubu_vx(DisasContext *ctx, arg_vwsubu_vx *a);
+typedef arg_rmrr arg_vwsub_vv;
+static bool trans_vwsub_vv(DisasContext *ctx, arg_vwsub_vv *a);
+typedef arg_rmrr arg_vwsub_vx;
+static bool trans_vwsub_vx(DisasContext *ctx, arg_vwsub_vx *a);
+typedef arg_rmrr arg_vwaddu_wv;
+static bool trans_vwaddu_wv(DisasContext *ctx, arg_vwaddu_wv *a);
+typedef arg_rmrr arg_vwaddu_wx;
+static bool trans_vwaddu_wx(DisasContext *ctx, arg_vwaddu_wx *a);
+typedef arg_rmrr arg_vwadd_wv;
+static bool trans_vwadd_wv(DisasContext *ctx, arg_vwadd_wv *a);
+typedef arg_rmrr arg_vwadd_wx;
+static bool trans_vwadd_wx(DisasContext *ctx, arg_vwadd_wx *a);
+typedef arg_rmrr arg_vwsubu_wv;
+static bool trans_vwsubu_wv(DisasContext *ctx, arg_vwsubu_wv *a);
+typedef arg_rmrr arg_vwsubu_wx;
+static bool trans_vwsubu_wx(DisasContext *ctx, arg_vwsubu_wx *a);
+typedef arg_rmrr arg_vwsub_wv;
+static bool trans_vwsub_wv(DisasContext *ctx, arg_vwsub_wv *a);
+typedef arg_rmrr arg_vwsub_wx;
+static bool trans_vwsub_wx(DisasContext *ctx, arg_vwsub_wx *a);
+typedef arg_rmrr arg_vadc_vvm;
+static bool trans_vadc_vvm(DisasContext *ctx, arg_vadc_vvm *a);
+typedef arg_rmrr arg_vadc_vxm;
+static bool trans_vadc_vxm(DisasContext *ctx, arg_vadc_vxm *a);
+typedef arg_rmrr arg_vadc_vim;
+static bool trans_vadc_vim(DisasContext *ctx, arg_vadc_vim *a);
+typedef arg_rmrr arg_vmadc_vvm;
+static bool trans_vmadc_vvm(DisasContext *ctx, arg_vmadc_vvm *a);
+typedef arg_rmrr arg_vmadc_vxm;
+static bool trans_vmadc_vxm(DisasContext *ctx, arg_vmadc_vxm *a);
+typedef arg_rmrr arg_vmadc_vim;
+static bool trans_vmadc_vim(DisasContext *ctx, arg_vmadc_vim *a);
+typedef arg_rmrr arg_vsbc_vvm;
+static bool trans_vsbc_vvm(DisasContext *ctx, arg_vsbc_vvm *a);
+typedef arg_rmrr arg_vsbc_vxm;
+static bool trans_vsbc_vxm(DisasContext *ctx, arg_vsbc_vxm *a);
+typedef arg_rmrr arg_vmsbc_vvm;
+static bool trans_vmsbc_vvm(DisasContext *ctx, arg_vmsbc_vvm *a);
+typedef arg_rmrr arg_vmsbc_vxm;
+static bool trans_vmsbc_vxm(DisasContext *ctx, arg_vmsbc_vxm *a);
+typedef arg_rmrr arg_vand_vv;
+static bool trans_vand_vv(DisasContext *ctx, arg_vand_vv *a);
+typedef arg_rmrr arg_vand_vx;
+static bool trans_vand_vx(DisasContext *ctx, arg_vand_vx *a);
+typedef arg_rmrr arg_vand_vi;
+static bool trans_vand_vi(DisasContext *ctx, arg_vand_vi *a);
+typedef arg_rmrr arg_vor_vv;
+static bool trans_vor_vv(DisasContext *ctx, arg_vor_vv *a);
+typedef arg_rmrr arg_vor_vx;
+static bool trans_vor_vx(DisasContext *ctx, arg_vor_vx *a);
+typedef arg_rmrr arg_vor_vi;
+static bool trans_vor_vi(DisasContext *ctx, arg_vor_vi *a);
+typedef arg_rmrr arg_vxor_vv;
+static bool trans_vxor_vv(DisasContext *ctx, arg_vxor_vv *a);
+typedef arg_rmrr arg_vxor_vx;
+static bool trans_vxor_vx(DisasContext *ctx, arg_vxor_vx *a);
+typedef arg_rmrr arg_vxor_vi;
+static bool trans_vxor_vi(DisasContext *ctx, arg_vxor_vi *a);
+typedef arg_rmrr arg_vsll_vv;
+static bool trans_vsll_vv(DisasContext *ctx, arg_vsll_vv *a);
+typedef arg_rmrr arg_vsll_vx;
+static bool trans_vsll_vx(DisasContext *ctx, arg_vsll_vx *a);
+typedef arg_rmrr arg_vsll_vi;
+static bool trans_vsll_vi(DisasContext *ctx, arg_vsll_vi *a);
+typedef arg_rmrr arg_vsrl_vv;
+static bool trans_vsrl_vv(DisasContext *ctx, arg_vsrl_vv *a);
+typedef arg_rmrr arg_vsrl_vx;
+static bool trans_vsrl_vx(DisasContext *ctx, arg_vsrl_vx *a);
+typedef arg_rmrr arg_vsrl_vi;
+static bool trans_vsrl_vi(DisasContext *ctx, arg_vsrl_vi *a);
+typedef arg_rmrr arg_vsra_vv;
+static bool trans_vsra_vv(DisasContext *ctx, arg_vsra_vv *a);
+typedef arg_rmrr arg_vsra_vx;
+static bool trans_vsra_vx(DisasContext *ctx, arg_vsra_vx *a);
+typedef arg_rmrr arg_vsra_vi;
+static bool trans_vsra_vi(DisasContext *ctx, arg_vsra_vi *a);
+typedef arg_rmrr arg_vnsrl_vv;
+static bool trans_vnsrl_vv(DisasContext *ctx, arg_vnsrl_vv *a);
+typedef arg_rmrr arg_vnsrl_vx;
+static bool trans_vnsrl_vx(DisasContext *ctx, arg_vnsrl_vx *a);
+typedef arg_rmrr arg_vnsrl_vi;
+static bool trans_vnsrl_vi(DisasContext *ctx, arg_vnsrl_vi *a);
+typedef arg_rmrr arg_vnsra_vv;
+static bool trans_vnsra_vv(DisasContext *ctx, arg_vnsra_vv *a);
+typedef arg_rmrr arg_vnsra_vx;
+static bool trans_vnsra_vx(DisasContext *ctx, arg_vnsra_vx *a);
+typedef arg_rmrr arg_vnsra_vi;
+static bool trans_vnsra_vi(DisasContext *ctx, arg_vnsra_vi *a);
+typedef arg_rmrr arg_vmseq_vv;
+static bool trans_vmseq_vv(DisasContext *ctx, arg_vmseq_vv *a);
+typedef arg_rmrr arg_vmseq_vx;
+static bool trans_vmseq_vx(DisasContext *ctx, arg_vmseq_vx *a);
+typedef arg_rmrr arg_vmseq_vi;
+static bool trans_vmseq_vi(DisasContext *ctx, arg_vmseq_vi *a);
+typedef arg_rmrr arg_vmsne_vv;
+static bool trans_vmsne_vv(DisasContext *ctx, arg_vmsne_vv *a);
+typedef arg_rmrr arg_vmsne_vx;
+static bool trans_vmsne_vx(DisasContext *ctx, arg_vmsne_vx *a);
+typedef arg_rmrr arg_vmsne_vi;
+static bool trans_vmsne_vi(DisasContext *ctx, arg_vmsne_vi *a);
+typedef arg_rmrr arg_vmsltu_vv;
+static bool trans_vmsltu_vv(DisasContext *ctx, arg_vmsltu_vv *a);
+typedef arg_rmrr arg_vmsltu_vx;
+static bool trans_vmsltu_vx(DisasContext *ctx, arg_vmsltu_vx *a);
+typedef arg_rmrr arg_vmslt_vv;
+static bool trans_vmslt_vv(DisasContext *ctx, arg_vmslt_vv *a);
+typedef arg_rmrr arg_vmslt_vx;
+static bool trans_vmslt_vx(DisasContext *ctx, arg_vmslt_vx *a);
+typedef arg_rmrr arg_vmsleu_vv;
+static bool trans_vmsleu_vv(DisasContext *ctx, arg_vmsleu_vv *a);
+typedef arg_rmrr arg_vmsleu_vx;
+static bool trans_vmsleu_vx(DisasContext *ctx, arg_vmsleu_vx *a);
+typedef arg_rmrr arg_vmsleu_vi;
+static bool trans_vmsleu_vi(DisasContext *ctx, arg_vmsleu_vi *a);
+typedef arg_rmrr arg_vmsle_vv;
+static bool trans_vmsle_vv(DisasContext *ctx, arg_vmsle_vv *a);
+typedef arg_rmrr arg_vmsle_vx;
+static bool trans_vmsle_vx(DisasContext *ctx, arg_vmsle_vx *a);
+typedef arg_rmrr arg_vmsle_vi;
+static bool trans_vmsle_vi(DisasContext *ctx, arg_vmsle_vi *a);
+typedef arg_rmrr arg_vmsgtu_vx;
+static bool trans_vmsgtu_vx(DisasContext *ctx, arg_vmsgtu_vx *a);
+typedef arg_rmrr arg_vmsgtu_vi;
+static bool trans_vmsgtu_vi(DisasContext *ctx, arg_vmsgtu_vi *a);
+typedef arg_rmrr arg_vmsgt_vx;
+static bool trans_vmsgt_vx(DisasContext *ctx, arg_vmsgt_vx *a);
+typedef arg_rmrr arg_vmsgt_vi;
+static bool trans_vmsgt_vi(DisasContext *ctx, arg_vmsgt_vi *a);
+typedef arg_rmrr arg_vminu_vv;
+static bool trans_vminu_vv(DisasContext *ctx, arg_vminu_vv *a);
+typedef arg_rmrr arg_vminu_vx;
+static bool trans_vminu_vx(DisasContext *ctx, arg_vminu_vx *a);
+typedef arg_rmrr arg_vmin_vv;
+static bool trans_vmin_vv(DisasContext *ctx, arg_vmin_vv *a);
+typedef arg_rmrr arg_vmin_vx;
+static bool trans_vmin_vx(DisasContext *ctx, arg_vmin_vx *a);
+typedef arg_rmrr arg_vmaxu_vv;
+static bool trans_vmaxu_vv(DisasContext *ctx, arg_vmaxu_vv *a);
+typedef arg_rmrr arg_vmaxu_vx;
+static bool trans_vmaxu_vx(DisasContext *ctx, arg_vmaxu_vx *a);
+typedef arg_rmrr arg_vmax_vv;
+static bool trans_vmax_vv(DisasContext *ctx, arg_vmax_vv *a);
+typedef arg_rmrr arg_vmax_vx;
+static bool trans_vmax_vx(DisasContext *ctx, arg_vmax_vx *a);
+typedef arg_rmrr arg_vmul_vv;
+static bool trans_vmul_vv(DisasContext *ctx, arg_vmul_vv *a);
+typedef arg_rmrr arg_vmul_vx;
+static bool trans_vmul_vx(DisasContext *ctx, arg_vmul_vx *a);
+typedef arg_rmrr arg_vmulh_vv;
+static bool trans_vmulh_vv(DisasContext *ctx, arg_vmulh_vv *a);
+typedef arg_rmrr arg_vmulh_vx;
+static bool trans_vmulh_vx(DisasContext *ctx, arg_vmulh_vx *a);
+typedef arg_rmrr arg_vmulhu_vv;
+static bool trans_vmulhu_vv(DisasContext *ctx, arg_vmulhu_vv *a);
+typedef arg_rmrr arg_vmulhu_vx;
+static bool trans_vmulhu_vx(DisasContext *ctx, arg_vmulhu_vx *a);
+typedef arg_rmrr arg_vmulhsu_vv;
+static bool trans_vmulhsu_vv(DisasContext *ctx, arg_vmulhsu_vv *a);
+typedef arg_rmrr arg_vmulhsu_vx;
+static bool trans_vmulhsu_vx(DisasContext *ctx, arg_vmulhsu_vx *a);
+typedef arg_rmrr arg_vdivu_vv;
+static bool trans_vdivu_vv(DisasContext *ctx, arg_vdivu_vv *a);
+typedef arg_rmrr arg_vdivu_vx;
+static bool trans_vdivu_vx(DisasContext *ctx, arg_vdivu_vx *a);
+typedef arg_rmrr arg_vdiv_vv;
+static bool trans_vdiv_vv(DisasContext *ctx, arg_vdiv_vv *a);
+typedef arg_rmrr arg_vdiv_vx;
+static bool trans_vdiv_vx(DisasContext *ctx, arg_vdiv_vx *a);
+typedef arg_rmrr arg_vremu_vv;
+static bool trans_vremu_vv(DisasContext *ctx, arg_vremu_vv *a);
+typedef arg_rmrr arg_vremu_vx;
+static bool trans_vremu_vx(DisasContext *ctx, arg_vremu_vx *a);
+typedef arg_rmrr arg_vrem_vv;
+static bool trans_vrem_vv(DisasContext *ctx, arg_vrem_vv *a);
+typedef arg_rmrr arg_vrem_vx;
+static bool trans_vrem_vx(DisasContext *ctx, arg_vrem_vx *a);
+typedef arg_rmrr arg_vwmulu_vv;
+static bool trans_vwmulu_vv(DisasContext *ctx, arg_vwmulu_vv *a);
+typedef arg_rmrr arg_vwmulu_vx;
+static bool trans_vwmulu_vx(DisasContext *ctx, arg_vwmulu_vx *a);
+typedef arg_rmrr arg_vwmulsu_vv;
+static bool trans_vwmulsu_vv(DisasContext *ctx, arg_vwmulsu_vv *a);
+typedef arg_rmrr arg_vwmulsu_vx;
+static bool trans_vwmulsu_vx(DisasContext *ctx, arg_vwmulsu_vx *a);
+typedef arg_rmrr arg_vwmul_vv;
+static bool trans_vwmul_vv(DisasContext *ctx, arg_vwmul_vv *a);
+typedef arg_rmrr arg_vwmul_vx;
+static bool trans_vwmul_vx(DisasContext *ctx, arg_vwmul_vx *a);
+typedef arg_rmrr arg_vmacc_vv;
+static bool trans_vmacc_vv(DisasContext *ctx, arg_vmacc_vv *a);
+typedef arg_rmrr arg_vmacc_vx;
+static bool trans_vmacc_vx(DisasContext *ctx, arg_vmacc_vx *a);
+typedef arg_rmrr arg_vnmsac_vv;
+static bool trans_vnmsac_vv(DisasContext *ctx, arg_vnmsac_vv *a);
+typedef arg_rmrr arg_vnmsac_vx;
+static bool trans_vnmsac_vx(DisasContext *ctx, arg_vnmsac_vx *a);
+typedef arg_rmrr arg_vmadd_vv;
+static bool trans_vmadd_vv(DisasContext *ctx, arg_vmadd_vv *a);
+typedef arg_rmrr arg_vmadd_vx;
+static bool trans_vmadd_vx(DisasContext *ctx, arg_vmadd_vx *a);
+typedef arg_rmrr arg_vnmsub_vv;
+static bool trans_vnmsub_vv(DisasContext *ctx, arg_vnmsub_vv *a);
+typedef arg_rmrr arg_vnmsub_vx;
+static bool trans_vnmsub_vx(DisasContext *ctx, arg_vnmsub_vx *a);
+typedef arg_rmrr arg_vwmaccu_vv;
+static bool trans_vwmaccu_vv(DisasContext *ctx, arg_vwmaccu_vv *a);
+typedef arg_rmrr arg_vwmaccu_vx;
+static bool trans_vwmaccu_vx(DisasContext *ctx, arg_vwmaccu_vx *a);
+typedef arg_rmrr arg_vwmacc_vv;
+static bool trans_vwmacc_vv(DisasContext *ctx, arg_vwmacc_vv *a);
+typedef arg_rmrr arg_vwmacc_vx;
+static bool trans_vwmacc_vx(DisasContext *ctx, arg_vwmacc_vx *a);
+typedef arg_rmrr arg_vwmaccsu_vv;
+static bool trans_vwmaccsu_vv(DisasContext *ctx, arg_vwmaccsu_vv *a);
+typedef arg_rmrr arg_vwmaccsu_vx;
+static bool trans_vwmaccsu_vx(DisasContext *ctx, arg_vwmaccsu_vx *a);
+typedef arg_rmrr arg_vwmaccus_vx;
+static bool trans_vwmaccus_vx(DisasContext *ctx, arg_vwmaccus_vx *a);
+typedef arg_decode_insn3218 arg_vmv_v_v;
+static bool trans_vmv_v_v(DisasContext *ctx, arg_vmv_v_v *a);
+typedef arg_decode_insn3218 arg_vmv_v_x;
+static bool trans_vmv_v_x(DisasContext *ctx, arg_vmv_v_x *a);
+typedef arg_decode_insn3218 arg_vmv_v_i;
+static bool trans_vmv_v_i(DisasContext *ctx, arg_vmv_v_i *a);
+typedef arg_rmrr arg_vmerge_vvm;
+static bool trans_vmerge_vvm(DisasContext *ctx, arg_vmerge_vvm *a);
+typedef arg_rmrr arg_vmerge_vxm;
+static bool trans_vmerge_vxm(DisasContext *ctx, arg_vmerge_vxm *a);
+typedef arg_rmrr arg_vmerge_vim;
+static bool trans_vmerge_vim(DisasContext *ctx, arg_vmerge_vim *a);
+typedef arg_rmrr arg_vsaddu_vv;
+static bool trans_vsaddu_vv(DisasContext *ctx, arg_vsaddu_vv *a);
+typedef arg_rmrr arg_vsaddu_vx;
+static bool trans_vsaddu_vx(DisasContext *ctx, arg_vsaddu_vx *a);
+typedef arg_rmrr arg_vsaddu_vi;
+static bool trans_vsaddu_vi(DisasContext *ctx, arg_vsaddu_vi *a);
+typedef arg_rmrr arg_vsadd_vv;
+static bool trans_vsadd_vv(DisasContext *ctx, arg_vsadd_vv *a);
+typedef arg_rmrr arg_vsadd_vx;
+static bool trans_vsadd_vx(DisasContext *ctx, arg_vsadd_vx *a);
+typedef arg_rmrr arg_vsadd_vi;
+static bool trans_vsadd_vi(DisasContext *ctx, arg_vsadd_vi *a);
+typedef arg_rmrr arg_vssubu_vv;
+static bool trans_vssubu_vv(DisasContext *ctx, arg_vssubu_vv *a);
+typedef arg_rmrr arg_vssubu_vx;
+static bool trans_vssubu_vx(DisasContext *ctx, arg_vssubu_vx *a);
+typedef arg_rmrr arg_vssub_vv;
+static bool trans_vssub_vv(DisasContext *ctx, arg_vssub_vv *a);
+typedef arg_rmrr arg_vssub_vx;
+static bool trans_vssub_vx(DisasContext *ctx, arg_vssub_vx *a);
+typedef arg_rmrr arg_vaadd_vv;
+static bool trans_vaadd_vv(DisasContext *ctx, arg_vaadd_vv *a);
+typedef arg_rmrr arg_vaadd_vx;
+static bool trans_vaadd_vx(DisasContext *ctx, arg_vaadd_vx *a);
+typedef arg_rmrr arg_vaadd_vi;
+static bool trans_vaadd_vi(DisasContext *ctx, arg_vaadd_vi *a);
+typedef arg_rmrr arg_vasub_vv;
+static bool trans_vasub_vv(DisasContext *ctx, arg_vasub_vv *a);
+typedef arg_rmrr arg_vasub_vx;
+static bool trans_vasub_vx(DisasContext *ctx, arg_vasub_vx *a);
+typedef arg_rmrr arg_vsmul_vv;
+static bool trans_vsmul_vv(DisasContext *ctx, arg_vsmul_vv *a);
+typedef arg_rmrr arg_vsmul_vx;
+static bool trans_vsmul_vx(DisasContext *ctx, arg_vsmul_vx *a);
+typedef arg_rmrr arg_vwsmaccu_vv;
+static bool trans_vwsmaccu_vv(DisasContext *ctx, arg_vwsmaccu_vv *a);
+typedef arg_rmrr arg_vwsmaccu_vx;
+static bool trans_vwsmaccu_vx(DisasContext *ctx, arg_vwsmaccu_vx *a);
+typedef arg_rmrr arg_vwsmacc_vv;
+static bool trans_vwsmacc_vv(DisasContext *ctx, arg_vwsmacc_vv *a);
+typedef arg_rmrr arg_vwsmacc_vx;
+static bool trans_vwsmacc_vx(DisasContext *ctx, arg_vwsmacc_vx *a);
+typedef arg_rmrr arg_vwsmaccsu_vv;
+static bool trans_vwsmaccsu_vv(DisasContext *ctx, arg_vwsmaccsu_vv *a);
+typedef arg_rmrr arg_vwsmaccsu_vx;
+static bool trans_vwsmaccsu_vx(DisasContext *ctx, arg_vwsmaccsu_vx *a);
+typedef arg_rmrr arg_vwsmaccus_vx;
+static bool trans_vwsmaccus_vx(DisasContext *ctx, arg_vwsmaccus_vx *a);
+typedef arg_rmrr arg_vssrl_vv;
+static bool trans_vssrl_vv(DisasContext *ctx, arg_vssrl_vv *a);
+typedef arg_rmrr arg_vssrl_vx;
+static bool trans_vssrl_vx(DisasContext *ctx, arg_vssrl_vx *a);
+typedef arg_rmrr arg_vssrl_vi;
+static bool trans_vssrl_vi(DisasContext *ctx, arg_vssrl_vi *a);
+typedef arg_rmrr arg_vssra_vv;
+static bool trans_vssra_vv(DisasContext *ctx, arg_vssra_vv *a);
+typedef arg_rmrr arg_vssra_vx;
+static bool trans_vssra_vx(DisasContext *ctx, arg_vssra_vx *a);
+typedef arg_rmrr arg_vssra_vi;
+static bool trans_vssra_vi(DisasContext *ctx, arg_vssra_vi *a);
+typedef arg_rmrr arg_vnclipu_vv;
+static bool trans_vnclipu_vv(DisasContext *ctx, arg_vnclipu_vv *a);
+typedef arg_rmrr arg_vnclipu_vx;
+static bool trans_vnclipu_vx(DisasContext *ctx, arg_vnclipu_vx *a);
+typedef arg_rmrr arg_vnclipu_vi;
+static bool trans_vnclipu_vi(DisasContext *ctx, arg_vnclipu_vi *a);
+typedef arg_rmrr arg_vnclip_vv;
+static bool trans_vnclip_vv(DisasContext *ctx, arg_vnclip_vv *a);
+typedef arg_rmrr arg_vnclip_vx;
+static bool trans_vnclip_vx(DisasContext *ctx, arg_vnclip_vx *a);
+typedef arg_rmrr arg_vnclip_vi;
+static bool trans_vnclip_vi(DisasContext *ctx, arg_vnclip_vi *a);
+typedef arg_rmrr arg_vfadd_vv;
+static bool trans_vfadd_vv(DisasContext *ctx, arg_vfadd_vv *a);
+typedef arg_rmrr arg_vfadd_vf;
+static bool trans_vfadd_vf(DisasContext *ctx, arg_vfadd_vf *a);
+typedef arg_rmrr arg_vfsub_vv;
+static bool trans_vfsub_vv(DisasContext *ctx, arg_vfsub_vv *a);
+typedef arg_rmrr arg_vfsub_vf;
+static bool trans_vfsub_vf(DisasContext *ctx, arg_vfsub_vf *a);
+typedef arg_rmrr arg_vfrsub_vf;
+static bool trans_vfrsub_vf(DisasContext *ctx, arg_vfrsub_vf *a);
+typedef arg_rmrr arg_vfwadd_vv;
+static bool trans_vfwadd_vv(DisasContext *ctx, arg_vfwadd_vv *a);
+typedef arg_rmrr arg_vfwadd_vf;
+static bool trans_vfwadd_vf(DisasContext *ctx, arg_vfwadd_vf *a);
+typedef arg_rmrr arg_vfwadd_wv;
+static bool trans_vfwadd_wv(DisasContext *ctx, arg_vfwadd_wv *a);
+typedef arg_rmrr arg_vfwadd_wf;
+static bool trans_vfwadd_wf(DisasContext *ctx, arg_vfwadd_wf *a);
+typedef arg_rmrr arg_vfwsub_vv;
+static bool trans_vfwsub_vv(DisasContext *ctx, arg_vfwsub_vv *a);
+typedef arg_rmrr arg_vfwsub_vf;
+static bool trans_vfwsub_vf(DisasContext *ctx, arg_vfwsub_vf *a);
+typedef arg_rmrr arg_vfwsub_wv;
+static bool trans_vfwsub_wv(DisasContext *ctx, arg_vfwsub_wv *a);
+typedef arg_rmrr arg_vfwsub_wf;
+static bool trans_vfwsub_wf(DisasContext *ctx, arg_vfwsub_wf *a);
+typedef arg_rmrr arg_vfmul_vv;
+static bool trans_vfmul_vv(DisasContext *ctx, arg_vfmul_vv *a);
+typedef arg_rmrr arg_vfmul_vf;
+static bool trans_vfmul_vf(DisasContext *ctx, arg_vfmul_vf *a);
+typedef arg_rmrr arg_vfdiv_vv;
+static bool trans_vfdiv_vv(DisasContext *ctx, arg_vfdiv_vv *a);
+typedef arg_rmrr arg_vfdiv_vf;
+static bool trans_vfdiv_vf(DisasContext *ctx, arg_vfdiv_vf *a);
+typedef arg_rmrr arg_vfrdiv_vf;
+static bool trans_vfrdiv_vf(DisasContext *ctx, arg_vfrdiv_vf *a);
+typedef arg_rmrr arg_vfwmul_vv;
+static bool trans_vfwmul_vv(DisasContext *ctx, arg_vfwmul_vv *a);
+typedef arg_rmrr arg_vfwmul_vf;
+static bool trans_vfwmul_vf(DisasContext *ctx, arg_vfwmul_vf *a);
+typedef arg_rmrr arg_vfmacc_vv;
+static bool trans_vfmacc_vv(DisasContext *ctx, arg_vfmacc_vv *a);
+typedef arg_rmrr arg_vfnmacc_vv;
+static bool trans_vfnmacc_vv(DisasContext *ctx, arg_vfnmacc_vv *a);
+typedef arg_rmrr arg_vfnmacc_vf;
+static bool trans_vfnmacc_vf(DisasContext *ctx, arg_vfnmacc_vf *a);
+typedef arg_rmrr arg_vfmacc_vf;
+static bool trans_vfmacc_vf(DisasContext *ctx, arg_vfmacc_vf *a);
+typedef arg_rmrr arg_vfmsac_vv;
+static bool trans_vfmsac_vv(DisasContext *ctx, arg_vfmsac_vv *a);
+typedef arg_rmrr arg_vfmsac_vf;
+static bool trans_vfmsac_vf(DisasContext *ctx, arg_vfmsac_vf *a);
+typedef arg_rmrr arg_vfnmsac_vv;
+static bool trans_vfnmsac_vv(DisasContext *ctx, arg_vfnmsac_vv *a);
+typedef arg_rmrr arg_vfnmsac_vf;
+static bool trans_vfnmsac_vf(DisasContext *ctx, arg_vfnmsac_vf *a);
+typedef arg_rmrr arg_vfmadd_vv;
+static bool trans_vfmadd_vv(DisasContext *ctx, arg_vfmadd_vv *a);
+typedef arg_rmrr arg_vfmadd_vf;
+static bool trans_vfmadd_vf(DisasContext *ctx, arg_vfmadd_vf *a);
+typedef arg_rmrr arg_vfnmadd_vv;
+static bool trans_vfnmadd_vv(DisasContext *ctx, arg_vfnmadd_vv *a);
+typedef arg_rmrr arg_vfnmadd_vf;
+static bool trans_vfnmadd_vf(DisasContext *ctx, arg_vfnmadd_vf *a);
+typedef arg_rmrr arg_vfmsub_vv;
+static bool trans_vfmsub_vv(DisasContext *ctx, arg_vfmsub_vv *a);
+typedef arg_rmrr arg_vfmsub_vf;
+static bool trans_vfmsub_vf(DisasContext *ctx, arg_vfmsub_vf *a);
+typedef arg_rmrr arg_vfnmsub_vv;
+static bool trans_vfnmsub_vv(DisasContext *ctx, arg_vfnmsub_vv *a);
+typedef arg_rmrr arg_vfnmsub_vf;
+static bool trans_vfnmsub_vf(DisasContext *ctx, arg_vfnmsub_vf *a);
+typedef arg_rmrr arg_vfwmacc_vv;
+static bool trans_vfwmacc_vv(DisasContext *ctx, arg_vfwmacc_vv *a);
+typedef arg_rmrr arg_vfwmacc_vf;
+static bool trans_vfwmacc_vf(DisasContext *ctx, arg_vfwmacc_vf *a);
+typedef arg_rmrr arg_vfwnmacc_vv;
+static bool trans_vfwnmacc_vv(DisasContext *ctx, arg_vfwnmacc_vv *a);
+typedef arg_rmrr arg_vfwnmacc_vf;
+static bool trans_vfwnmacc_vf(DisasContext *ctx, arg_vfwnmacc_vf *a);
+typedef arg_rmrr arg_vfwmsac_vv;
+static bool trans_vfwmsac_vv(DisasContext *ctx, arg_vfwmsac_vv *a);
+typedef arg_rmrr arg_vfwmsac_vf;
+static bool trans_vfwmsac_vf(DisasContext *ctx, arg_vfwmsac_vf *a);
+typedef arg_rmrr arg_vfwnmsac_vv;
+static bool trans_vfwnmsac_vv(DisasContext *ctx, arg_vfwnmsac_vv *a);
+typedef arg_rmrr arg_vfwnmsac_vf;
+static bool trans_vfwnmsac_vf(DisasContext *ctx, arg_vfwnmsac_vf *a);
+typedef arg_rmr arg_vfsqrt_v;
+static bool trans_vfsqrt_v(DisasContext *ctx, arg_vfsqrt_v *a);
+typedef arg_rmrr arg_vfmin_vv;
+static bool trans_vfmin_vv(DisasContext *ctx, arg_vfmin_vv *a);
+typedef arg_rmrr arg_vfmin_vf;
+static bool trans_vfmin_vf(DisasContext *ctx, arg_vfmin_vf *a);
+typedef arg_rmrr arg_vfmax_vv;
+static bool trans_vfmax_vv(DisasContext *ctx, arg_vfmax_vv *a);
+typedef arg_rmrr arg_vfmax_vf;
+static bool trans_vfmax_vf(DisasContext *ctx, arg_vfmax_vf *a);
+typedef arg_rmrr arg_vfsgnj_vv;
+static bool trans_vfsgnj_vv(DisasContext *ctx, arg_vfsgnj_vv *a);
+typedef arg_rmrr arg_vfsgnj_vf;
+static bool trans_vfsgnj_vf(DisasContext *ctx, arg_vfsgnj_vf *a);
+typedef arg_rmrr arg_vfsgnjn_vv;
+static bool trans_vfsgnjn_vv(DisasContext *ctx, arg_vfsgnjn_vv *a);
+typedef arg_rmrr arg_vfsgnjn_vf;
+static bool trans_vfsgnjn_vf(DisasContext *ctx, arg_vfsgnjn_vf *a);
+typedef arg_rmrr arg_vfsgnjx_vv;
+static bool trans_vfsgnjx_vv(DisasContext *ctx, arg_vfsgnjx_vv *a);
+typedef arg_rmrr arg_vfsgnjx_vf;
+static bool trans_vfsgnjx_vf(DisasContext *ctx, arg_vfsgnjx_vf *a);
+typedef arg_rmrr arg_vmfeq_vv;
+static bool trans_vmfeq_vv(DisasContext *ctx, arg_vmfeq_vv *a);
+typedef arg_rmrr arg_vmfeq_vf;
+static bool trans_vmfeq_vf(DisasContext *ctx, arg_vmfeq_vf *a);
+typedef arg_rmrr arg_vmfne_vv;
+static bool trans_vmfne_vv(DisasContext *ctx, arg_vmfne_vv *a);
+typedef arg_rmrr arg_vmfne_vf;
+static bool trans_vmfne_vf(DisasContext *ctx, arg_vmfne_vf *a);
+typedef arg_rmrr arg_vmflt_vv;
+static bool trans_vmflt_vv(DisasContext *ctx, arg_vmflt_vv *a);
+typedef arg_rmrr arg_vmflt_vf;
+static bool trans_vmflt_vf(DisasContext *ctx, arg_vmflt_vf *a);
+typedef arg_rmrr arg_vmfle_vv;
+static bool trans_vmfle_vv(DisasContext *ctx, arg_vmfle_vv *a);
+typedef arg_rmrr arg_vmfle_vf;
+static bool trans_vmfle_vf(DisasContext *ctx, arg_vmfle_vf *a);
+typedef arg_rmrr arg_vmfgt_vf;
+static bool trans_vmfgt_vf(DisasContext *ctx, arg_vmfgt_vf *a);
+typedef arg_rmrr arg_vmfge_vf;
+static bool trans_vmfge_vf(DisasContext *ctx, arg_vmfge_vf *a);
+typedef arg_rmrr arg_vmford_vv;
+static bool trans_vmford_vv(DisasContext *ctx, arg_vmford_vv *a);
+typedef arg_rmrr arg_vmford_vf;
+static bool trans_vmford_vf(DisasContext *ctx, arg_vmford_vf *a);
+typedef arg_rmr arg_vfclass_v;
+static bool trans_vfclass_v(DisasContext *ctx, arg_vfclass_v *a);
+typedef arg_rmrr arg_vfmerge_vfm;
+static bool trans_vfmerge_vfm(DisasContext *ctx, arg_vfmerge_vfm *a);
+typedef arg_decode_insn3218 arg_vfmv_v_f;
+static bool trans_vfmv_v_f(DisasContext *ctx, arg_vfmv_v_f *a);
+typedef arg_rmr arg_vfcvt_xu_f_v;
+static bool trans_vfcvt_xu_f_v(DisasContext *ctx, arg_vfcvt_xu_f_v *a);
+typedef arg_rmr arg_vfcvt_x_f_v;
+static bool trans_vfcvt_x_f_v(DisasContext *ctx, arg_vfcvt_x_f_v *a);
+typedef arg_rmr arg_vfcvt_f_xu_v;
+static bool trans_vfcvt_f_xu_v(DisasContext *ctx, arg_vfcvt_f_xu_v *a);
+typedef arg_rmr arg_vfcvt_f_x_v;
+static bool trans_vfcvt_f_x_v(DisasContext *ctx, arg_vfcvt_f_x_v *a);
+typedef arg_rmr arg_vfwcvt_xu_f_v;
+static bool trans_vfwcvt_xu_f_v(DisasContext *ctx, arg_vfwcvt_xu_f_v *a);
+typedef arg_rmr arg_vfwcvt_x_f_v;
+static bool trans_vfwcvt_x_f_v(DisasContext *ctx, arg_vfwcvt_x_f_v *a);
+typedef arg_rmr arg_vfwcvt_f_xu_v;
+static bool trans_vfwcvt_f_xu_v(DisasContext *ctx, arg_vfwcvt_f_xu_v *a);
+typedef arg_rmr arg_vfwcvt_f_x_v;
+static bool trans_vfwcvt_f_x_v(DisasContext *ctx, arg_vfwcvt_f_x_v *a);
+typedef arg_rmr arg_vfwcvt_f_f_v;
+static bool trans_vfwcvt_f_f_v(DisasContext *ctx, arg_vfwcvt_f_f_v *a);
+typedef arg_rmr arg_vfncvt_xu_f_v;
+static bool trans_vfncvt_xu_f_v(DisasContext *ctx, arg_vfncvt_xu_f_v *a);
+typedef arg_rmr arg_vfncvt_x_f_v;
+static bool trans_vfncvt_x_f_v(DisasContext *ctx, arg_vfncvt_x_f_v *a);
+typedef arg_rmr arg_vfncvt_f_xu_v;
+static bool trans_vfncvt_f_xu_v(DisasContext *ctx, arg_vfncvt_f_xu_v *a);
+typedef arg_rmr arg_vfncvt_f_x_v;
+static bool trans_vfncvt_f_x_v(DisasContext *ctx, arg_vfncvt_f_x_v *a);
+typedef arg_rmr arg_vfncvt_f_f_v;
+static bool trans_vfncvt_f_f_v(DisasContext *ctx, arg_vfncvt_f_f_v *a);
+typedef arg_rmrr arg_vredsum_vs;
+static bool trans_vredsum_vs(DisasContext *ctx, arg_vredsum_vs *a);
+typedef arg_rmrr arg_vredand_vs;
+static bool trans_vredand_vs(DisasContext *ctx, arg_vredand_vs *a);
+typedef arg_rmrr arg_vredor_vs;
+static bool trans_vredor_vs(DisasContext *ctx, arg_vredor_vs *a);
+typedef arg_rmrr arg_vredxor_vs;
+static bool trans_vredxor_vs(DisasContext *ctx, arg_vredxor_vs *a);
+typedef arg_rmrr arg_vredminu_vs;
+static bool trans_vredminu_vs(DisasContext *ctx, arg_vredminu_vs *a);
+typedef arg_rmrr arg_vredmin_vs;
+static bool trans_vredmin_vs(DisasContext *ctx, arg_vredmin_vs *a);
+typedef arg_rmrr arg_vredmaxu_vs;
+static bool trans_vredmaxu_vs(DisasContext *ctx, arg_vredmaxu_vs *a);
+typedef arg_rmrr arg_vredmax_vs;
+static bool trans_vredmax_vs(DisasContext *ctx, arg_vredmax_vs *a);
+typedef arg_rmrr arg_vwredsumu_vs;
+static bool trans_vwredsumu_vs(DisasContext *ctx, arg_vwredsumu_vs *a);
+typedef arg_rmrr arg_vwredsum_vs;
+static bool trans_vwredsum_vs(DisasContext *ctx, arg_vwredsum_vs *a);
+typedef arg_rmrr arg_vfredsum_vs;
+static bool trans_vfredsum_vs(DisasContext *ctx, arg_vfredsum_vs *a);
+typedef arg_rmrr arg_vfredmin_vs;
+static bool trans_vfredmin_vs(DisasContext *ctx, arg_vfredmin_vs *a);
+typedef arg_rmrr arg_vfredmax_vs;
+static bool trans_vfredmax_vs(DisasContext *ctx, arg_vfredmax_vs *a);
+typedef arg_rmrr arg_vfwredsum_vs;
+static bool trans_vfwredsum_vs(DisasContext *ctx, arg_vfwredsum_vs *a);
+typedef arg_r arg_vmand_mm;
+static bool trans_vmand_mm(DisasContext *ctx, arg_vmand_mm *a);
+typedef arg_r arg_vmnand_mm;
+static bool trans_vmnand_mm(DisasContext *ctx, arg_vmnand_mm *a);
+typedef arg_r arg_vmandnot_mm;
+static bool trans_vmandnot_mm(DisasContext *ctx, arg_vmandnot_mm *a);
+typedef arg_r arg_vmxor_mm;
+static bool trans_vmxor_mm(DisasContext *ctx, arg_vmxor_mm *a);
+typedef arg_r arg_vmor_mm;
+static bool trans_vmor_mm(DisasContext *ctx, arg_vmor_mm *a);
+typedef arg_r arg_vmnor_mm;
+static bool trans_vmnor_mm(DisasContext *ctx, arg_vmnor_mm *a);
+typedef arg_r arg_vmornot_mm;
+static bool trans_vmornot_mm(DisasContext *ctx, arg_vmornot_mm *a);
+typedef arg_r arg_vmxnor_mm;
+static bool trans_vmxnor_mm(DisasContext *ctx, arg_vmxnor_mm *a);
+typedef arg_rmr arg_vmpopc_m;
+static bool trans_vmpopc_m(DisasContext *ctx, arg_vmpopc_m *a);
+typedef arg_rmr arg_vmfirst_m;
+static bool trans_vmfirst_m(DisasContext *ctx, arg_vmfirst_m *a);
+typedef arg_rmr arg_vmsbf_m;
+static bool trans_vmsbf_m(DisasContext *ctx, arg_vmsbf_m *a);
+typedef arg_rmr arg_vmsif_m;
+static bool trans_vmsif_m(DisasContext *ctx, arg_vmsif_m *a);
+typedef arg_rmr arg_vmsof_m;
+static bool trans_vmsof_m(DisasContext *ctx, arg_vmsof_m *a);
+typedef arg_rmr arg_viota_m;
+static bool trans_viota_m(DisasContext *ctx, arg_viota_m *a);
+typedef arg_decode_insn3219 arg_vid_v;
+static bool trans_vid_v(DisasContext *ctx, arg_vid_v *a);
+typedef arg_r arg_vext_x_v;
+static bool trans_vext_x_v(DisasContext *ctx, arg_vext_x_v *a);
+typedef arg_decode_insn3218 arg_vmv_s_x;
+static bool trans_vmv_s_x(DisasContext *ctx, arg_vmv_s_x *a);
+typedef arg_decode_insn3220 arg_vfmv_f_s;
+static bool trans_vfmv_f_s(DisasContext *ctx, arg_vfmv_f_s *a);
+typedef arg_decode_insn3218 arg_vfmv_s_f;
+static bool trans_vfmv_s_f(DisasContext *ctx, arg_vfmv_s_f *a);
+typedef arg_rmrr arg_vslideup_vx;
+static bool trans_vslideup_vx(DisasContext *ctx, arg_vslideup_vx *a);
+typedef arg_rmrr arg_vslideup_vi;
+static bool trans_vslideup_vi(DisasContext *ctx, arg_vslideup_vi *a);
+typedef arg_rmrr arg_vslide1up_vx;
+static bool trans_vslide1up_vx(DisasContext *ctx, arg_vslide1up_vx *a);
+typedef arg_rmrr arg_vslidedown_vx;
+static bool trans_vslidedown_vx(DisasContext *ctx, arg_vslidedown_vx *a);
+typedef arg_rmrr arg_vslidedown_vi;
+static bool trans_vslidedown_vi(DisasContext *ctx, arg_vslidedown_vi *a);
+typedef arg_rmrr arg_vslide1down_vx;
+static bool trans_vslide1down_vx(DisasContext *ctx, arg_vslide1down_vx *a);
+typedef arg_rmrr arg_vrgather_vv;
+static bool trans_vrgather_vv(DisasContext *ctx, arg_vrgather_vv *a);
+typedef arg_rmrr arg_vrgather_vx;
+static bool trans_vrgather_vx(DisasContext *ctx, arg_vrgather_vx *a);
+typedef arg_rmrr arg_vrgather_vi;
+static bool trans_vrgather_vi(DisasContext *ctx, arg_vrgather_vi *a);
+typedef arg_r arg_vcompress_vm;
+static bool trans_vcompress_vm(DisasContext *ctx, arg_vcompress_vm *a);
+typedef arg_decode_insn3221 arg_vsetvli;
+static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a);
+typedef arg_r arg_vsetvl;
+static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a);
 typedef arg_i arg_lwu;
 static bool trans_lwu(DisasContext *ctx, arg_lwu *a);
 typedef arg_i arg_ld;
@@ -408,25 +1142,43 @@ typedef arg_atomic arg_amominu_d;
 static bool trans_amominu_d(DisasContext *ctx, arg_amominu_d *a);
 typedef arg_atomic arg_amomaxu_d;
 static bool trans_amomaxu_d(DisasContext *ctx, arg_amomaxu_d *a);
-typedef arg_decode_insn3212 arg_fcvt_l_s;
+typedef arg_rwdvm arg_vamoswapd_v;
+static bool trans_vamoswapd_v(DisasContext *ctx, arg_vamoswapd_v *a);
+typedef arg_rwdvm arg_vamoaddd_v;
+static bool trans_vamoaddd_v(DisasContext *ctx, arg_vamoaddd_v *a);
+typedef arg_rwdvm arg_vamoxord_v;
+static bool trans_vamoxord_v(DisasContext *ctx, arg_vamoxord_v *a);
+typedef arg_rwdvm arg_vamoandd_v;
+static bool trans_vamoandd_v(DisasContext *ctx, arg_vamoandd_v *a);
+typedef arg_rwdvm arg_vamoord_v;
+static bool trans_vamoord_v(DisasContext *ctx, arg_vamoord_v *a);
+typedef arg_rwdvm arg_vamomind_v;
+static bool trans_vamomind_v(DisasContext *ctx, arg_vamomind_v *a);
+typedef arg_rwdvm arg_vamomaxd_v;
+static bool trans_vamomaxd_v(DisasContext *ctx, arg_vamomaxd_v *a);
+typedef arg_rwdvm arg_vamominud_v;
+static bool trans_vamominud_v(DisasContext *ctx, arg_vamominud_v *a);
+typedef arg_rwdvm arg_vamomaxud_v;
+static bool trans_vamomaxud_v(DisasContext *ctx, arg_vamomaxud_v *a);
+typedef arg_decode_insn3217 arg_fcvt_l_s;
 static bool trans_fcvt_l_s(DisasContext *ctx, arg_fcvt_l_s *a);
-typedef arg_decode_insn3212 arg_fcvt_lu_s;
+typedef arg_decode_insn3217 arg_fcvt_lu_s;
 static bool trans_fcvt_lu_s(DisasContext *ctx, arg_fcvt_lu_s *a);
-typedef arg_decode_insn3212 arg_fcvt_s_l;
+typedef arg_decode_insn3217 arg_fcvt_s_l;
 static bool trans_fcvt_s_l(DisasContext *ctx, arg_fcvt_s_l *a);
-typedef arg_decode_insn3212 arg_fcvt_s_lu;
+typedef arg_decode_insn3217 arg_fcvt_s_lu;
 static bool trans_fcvt_s_lu(DisasContext *ctx, arg_fcvt_s_lu *a);
-typedef arg_decode_insn3212 arg_fcvt_l_d;
+typedef arg_decode_insn3217 arg_fcvt_l_d;
 static bool trans_fcvt_l_d(DisasContext *ctx, arg_fcvt_l_d *a);
-typedef arg_decode_insn3212 arg_fcvt_lu_d;
+typedef arg_decode_insn3217 arg_fcvt_lu_d;
 static bool trans_fcvt_lu_d(DisasContext *ctx, arg_fcvt_lu_d *a);
-typedef arg_decode_insn3213 arg_fmv_x_d;
+typedef arg_decode_insn3218 arg_fmv_x_d;
 static bool trans_fmv_x_d(DisasContext *ctx, arg_fmv_x_d *a);
-typedef arg_decode_insn3212 arg_fcvt_d_l;
+typedef arg_decode_insn3217 arg_fcvt_d_l;
 static bool trans_fcvt_d_l(DisasContext *ctx, arg_fcvt_d_l *a);
-typedef arg_decode_insn3212 arg_fcvt_d_lu;
+typedef arg_decode_insn3217 arg_fcvt_d_lu;
 static bool trans_fcvt_d_lu(DisasContext *ctx, arg_fcvt_d_lu *a);
-typedef arg_decode_insn3213 arg_fmv_d_x;
+typedef arg_decode_insn3218 arg_fmv_d_x;
 static bool trans_fmv_d_x(DisasContext *ctx, arg_fmv_d_x *a);
 
 static void decode_insn32_extract_atom_ld(DisasContext *ctx, arg_atomic *a, uint32_t insn)
@@ -454,30 +1206,30 @@ static void decode_insn32_extract_b(DisasContext *ctx, arg_b *a, uint32_t insn)
     a->rs1 = extract32(insn, 15, 5);
 }
 
-static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn329 *a, uint32_t insn)
+static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn)
 {
     a->csr = extract32(insn, 20, 12);
     a->rs1 = extract32(insn, 15, 5);
     a->rd = extract32(insn, 7, 5);
 }
 
-static void decode_insn32_extract_decode_insn32_Fmt_18(DisasContext *ctx, arg_empty *a, uint32_t insn)
+static void decode_insn32_extract_decode_insn32_Fmt_28(DisasContext *ctx, arg_empty *a, uint32_t insn)
 {
 }
 
-static void decode_insn32_extract_decode_insn32_Fmt_19(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn)
+static void decode_insn32_extract_decode_insn32_Fmt_29(DisasContext *ctx, arg_decode_insn3224 *a, uint32_t insn)
 {
     a->pred = extract32(insn, 24, 4);
     a->succ = extract32(insn, 20, 4);
 }
 
-static void decode_insn32_extract_hfence_bvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn)
+static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn)
 {
     a->rs2 = extract32(insn, 20, 5);
     a->rs1 = extract32(insn, 15, 5);
 }
 
-static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn)
+static void decode_insn32_extract_hfence_vvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn)
 {
     a->rs2 = extract32(insn, 20, 5);
     a->rs1 = extract32(insn, 15, 5);
@@ -503,20 +1255,54 @@ static void decode_insn32_extract_r(DisasContext *ctx, arg_r *a, uint32_t insn)
     a->rd = extract32(insn, 7, 5);
 }
 
-static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3213 *a, uint32_t insn)
+static void decode_insn32_extract_r1_vm(DisasContext *ctx, arg_decode_insn3219 *a, uint32_t insn)
+{
+    a->vm = extract32(insn, 25, 1);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3218 *a, uint32_t insn)
+{
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r2_nfvm(DisasContext *ctx, arg_r2nfvm *a, uint32_t insn)
 {
+    a->vm = extract32(insn, 25, 1);
+    a->nf = ex_plus_1(ctx, extract32(insn, 29, 3));
     a->rs1 = extract32(insn, 15, 5);
     a->rd = extract32(insn, 7, 5);
 }
 
-static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3212 *a, uint32_t insn)
+static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3217 *a, uint32_t insn)
 {
     a->rs1 = extract32(insn, 15, 5);
     a->rm = extract32(insn, 12, 3);
     a->rd = extract32(insn, 7, 5);
 }
 
-static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 *a, uint32_t insn)
+static void decode_insn32_extract_r2_vm(DisasContext *ctx, arg_rmr *a, uint32_t insn)
+{
+    a->vm = extract32(insn, 25, 1);
+    a->rs2 = extract32(insn, 20, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r2_zimm(DisasContext *ctx, arg_decode_insn3221 *a, uint32_t insn)
+{
+    a->zimm = extract32(insn, 20, 11);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r2rd(DisasContext *ctx, arg_decode_insn3220 *a, uint32_t insn)
+{
+    a->rs2 = extract32(insn, 20, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn)
 {
     a->rs3 = extract32(insn, 27, 5);
     a->rs2 = extract32(insn, 20, 5);
@@ -525,7 +1311,16 @@ static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 *
     a->rd = extract32(insn, 7, 5);
 }
 
-static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a, uint32_t insn)
+static void decode_insn32_extract_r_nfvm(DisasContext *ctx, arg_rnfvm *a, uint32_t insn)
+{
+    a->vm = extract32(insn, 25, 1);
+    a->nf = ex_plus_1(ctx, extract32(insn, 29, 3));
+    a->rs2 = extract32(insn, 20, 5);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn)
 {
     a->rs2 = extract32(insn, 20, 5);
     a->rs1 = extract32(insn, 15, 5);
@@ -533,6 +1328,39 @@ static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a
     a->rd = extract32(insn, 7, 5);
 }
 
+static void decode_insn32_extract_r_vm(DisasContext *ctx, arg_rmrr *a, uint32_t insn)
+{
+    a->vm = extract32(insn, 25, 1);
+    a->rs2 = extract32(insn, 20, 5);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r_vm_0(DisasContext *ctx, arg_rmrr *a, uint32_t insn)
+{
+    a->vm = 0;
+    a->rs2 = extract32(insn, 20, 5);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r_vm_1(DisasContext *ctx, arg_rmrr *a, uint32_t insn)
+{
+    a->vm = 1;
+    a->rs2 = extract32(insn, 20, 5);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
+static void decode_insn32_extract_r_wdvm(DisasContext *ctx, arg_rwdvm *a, uint32_t insn)
+{
+    a->wd = extract32(insn, 26, 1);
+    a->vm = extract32(insn, 25, 1);
+    a->rs2 = extract32(insn, 20, 5);
+    a->rs1 = extract32(insn, 15, 5);
+    a->rd = extract32(insn, 7, 5);
+}
+
 static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn)
 {
     a->imm = deposit32(extract32(insn, 7, 5), 5, 27, sextract32(insn, 25, 7));
@@ -540,12 +1368,12 @@ static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn)
     a->rs1 = extract32(insn, 15, 5);
 }
 
-static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn)
+static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3223 *a, uint32_t insn)
 {
     a->rs1 = extract32(insn, 15, 5);
 }
 
-static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn)
+static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn)
 {
     a->rs2 = extract32(insn, 20, 5);
     a->rs1 = extract32(insn, 15, 5);
@@ -576,18 +1404,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
     union {
         arg_atomic f_atomic;
         arg_b f_b;
-        arg_decode_insn3210 f_decode_insn3210;
-        arg_decode_insn3211 f_decode_insn3211;
-        arg_decode_insn3212 f_decode_insn3212;
-        arg_decode_insn3213 f_decode_insn3213;
         arg_decode_insn3214 f_decode_insn3214;
         arg_decode_insn3215 f_decode_insn3215;
         arg_decode_insn3216 f_decode_insn3216;
-        arg_decode_insn329 f_decode_insn329;
+        arg_decode_insn3217 f_decode_insn3217;
+        arg_decode_insn3218 f_decode_insn3218;
+        arg_decode_insn3219 f_decode_insn3219;
+        arg_decode_insn3220 f_decode_insn3220;
+        arg_decode_insn3221 f_decode_insn3221;
+        arg_decode_insn3222 f_decode_insn3222;
+        arg_decode_insn3223 f_decode_insn3223;
+        arg_decode_insn3224 f_decode_insn3224;
         arg_empty f_empty;
         arg_i f_i;
         arg_j f_j;
         arg_r f_r;
+        arg_r2nfvm f_r2nfvm;
+        arg_rmr f_rmr;
+        arg_rmrr f_rmrr;
+        arg_rnfvm f_rnfvm;
+        arg_rwdvm f_rwdvm;
         arg_s f_s;
         arg_shift f_shift;
         arg_u f_u;
@@ -600,55 +1436,235 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:96 */
             if (trans_lb(ctx, &u.f_i)) return true;
             return false;
         case 0x1:
             /* ........ ........ .001.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:97 */
             if (trans_lh(ctx, &u.f_i)) return true;
             return false;
         case 0x2:
             /* ........ ........ .010.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:98 */
             if (trans_lw(ctx, &u.f_i)) return true;
             return false;
         case 0x3:
             /* ........ ........ .011.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:28 */
             if (trans_ld(ctx, &u.f_i)) return true;
             return false;
         case 0x4:
             /* ........ ........ .100.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:99 */
             if (trans_lbu(ctx, &u.f_i)) return true;
             return false;
         case 0x5:
             /* ........ ........ .101.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:100 */
             if (trans_lhu(ctx, &u.f_i)) return true;
             return false;
         case 0x6:
             /* ........ ........ .110.... .0000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:27 */
             if (trans_lwu(ctx, &u.f_i)) return true;
             return false;
         }
         return false;
     case 0x00000007:
         /* ........ ........ ........ .0000111 */
-        decode_insn32_extract_i(ctx, &u.f_i, insn);
         switch ((insn >> 12) & 0x7) {
+        case 0x0:
+            /* ........ ........ .000.... .0000111 */
+            switch ((insn >> 26) & 0x7) {
+            case 0x0:
+                /* ...000.. ........ .000.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...000.0 0000.... .000.... .0000111 */
+                    if (trans_vlbu_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...000.1 0000.... .000.... .0000111 */
+                    if (trans_vlbuff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ...010.. ........ .000.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlsbu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x3:
+                /* ...011.. ........ .000.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxbu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x4:
+                /* ...100.. ........ .000.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...100.0 0000.... .000.... .0000111 */
+                    if (trans_vlb_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...100.1 0000.... .000.... .0000111 */
+                    if (trans_vlbff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x6:
+                /* ...110.. ........ .000.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlsb_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x7:
+                /* ...111.. ........ .000.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxb_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
         case 0x2:
             /* ........ ........ .010.... .0000111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:156 */
+            decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_flw(ctx, &u.f_i)) return true;
             return false;
         case 0x3:
             /* ........ ........ .011.... .0000111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:184 */
+            decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_fld(ctx, &u.f_i)) return true;
             return false;
+        case 0x5:
+            /* ........ ........ .101.... .0000111 */
+            switch ((insn >> 26) & 0x7) {
+            case 0x0:
+                /* ...000.. ........ .101.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...000.0 0000.... .101.... .0000111 */
+                    if (trans_vlhu_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...000.1 0000.... .101.... .0000111 */
+                    if (trans_vlhuff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ...010.. ........ .101.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlshu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x3:
+                /* ...011.. ........ .101.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxhu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x4:
+                /* ...100.. ........ .101.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...100.0 0000.... .101.... .0000111 */
+                    if (trans_vlh_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...100.1 0000.... .101.... .0000111 */
+                    if (trans_vlhff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x6:
+                /* ...110.. ........ .101.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlsh_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x7:
+                /* ...111.. ........ .101.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxh_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
+        case 0x6:
+            /* ........ ........ .110.... .0000111 */
+            switch ((insn >> 26) & 0x7) {
+            case 0x0:
+                /* ...000.. ........ .110.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...000.0 0000.... .110.... .0000111 */
+                    if (trans_vlwu_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...000.1 0000.... .110.... .0000111 */
+                    if (trans_vlwuff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ...010.. ........ .110.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlswu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x3:
+                /* ...011.. ........ .110.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxwu_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x4:
+                /* ...100.. ........ .110.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...100.0 0000.... .110.... .0000111 */
+                    if (trans_vlw_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...100.1 0000.... .110.... .0000111 */
+                    if (trans_vlwff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x6:
+                /* ...110.. ........ .110.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlsw_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x7:
+                /* ...111.. ........ .110.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxw_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
+        case 0x7:
+            /* ........ ........ .111.... .0000111 */
+            switch ((insn >> 26) & 0x7) {
+            case 0x0:
+                /* ...000.. ........ .111.... .0000111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch ((insn >> 20) & 0x1f) {
+                case 0x0:
+                    /* ...000.0 0000.... .111.... .0000111 */
+                    if (trans_vle_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                case 0x10:
+                    /* ...000.1 0000.... .111.... .0000111 */
+                    if (trans_vleff_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ...010.. ........ .111.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlse_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            case 0x3:
+                /* ...011.. ........ .111.... .0000111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vlxe_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
         }
         return false;
     case 0x0000000f:
@@ -656,14 +1672,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .0001111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:123 */
-            decode_insn32_extract_decode_insn32_Fmt_19(ctx, &u.f_decode_insn3216, insn);
-            if (trans_fence(ctx, &u.f_decode_insn3216)) return true;
+            decode_insn32_extract_decode_insn32_Fmt_29(ctx, &u.f_decode_insn3224, insn);
+            if (trans_fence(ctx, &u.f_decode_insn3224)) return true;
             return false;
         case 0x1:
             /* ........ ........ .001.... .0001111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:124 */
-            decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn);
+            decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn);
             if (trans_fence_i(ctx, &u.f_empty)) return true;
             return false;
         }
@@ -673,7 +1687,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:104 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_addi(ctx, &u.f_i)) return true;
             return false;
@@ -683,26 +1696,22 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 30) & 0x3) {
             case 0x0:
                 /* 00...... ........ .001.... .0010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:110 */
                 if (trans_slli(ctx, &u.f_shift)) return true;
                 return false;
             }
             return false;
         case 0x2:
             /* ........ ........ .010.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:105 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_slti(ctx, &u.f_i)) return true;
             return false;
         case 0x3:
             /* ........ ........ .011.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:106 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_sltiu(ctx, &u.f_i)) return true;
             return false;
         case 0x4:
             /* ........ ........ .100.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:107 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_xori(ctx, &u.f_i)) return true;
             return false;
@@ -712,25 +1721,21 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 30) & 0x3) {
             case 0x0:
                 /* 00...... ........ .101.... .0010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:111 */
                 if (trans_srli(ctx, &u.f_shift)) return true;
                 return false;
             case 0x1:
                 /* 01...... ........ .101.... .0010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:112 */
                 if (trans_srai(ctx, &u.f_shift)) return true;
                 return false;
             }
             return false;
         case 0x6:
             /* ........ ........ .110.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:108 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_ori(ctx, &u.f_i)) return true;
             return false;
         case 0x7:
             /* ........ ........ .111.... .0010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:109 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_andi(ctx, &u.f_i)) return true;
             return false;
@@ -738,7 +1743,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         return false;
     case 0x00000017:
         /* ........ ........ ........ .0010111 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:87 */
         decode_insn32_extract_u(ctx, &u.f_u, insn);
         if (trans_auipc(ctx, &u.f_u)) return true;
         return false;
@@ -747,7 +1751,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .0011011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:30 */
             decode_insn32_extract_i(ctx, &u.f_i, insn);
             if (trans_addiw(ctx, &u.f_i)) return true;
             return false;
@@ -757,7 +1760,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 25) & 0x7f) {
             case 0x0:
                 /* 0000000. ........ .001.... .0011011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:31 */
                 if (trans_slliw(ctx, &u.f_shift)) return true;
                 return false;
             }
@@ -768,12 +1770,10 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 25) & 0x7f) {
             case 0x0:
                 /* 0000000. ........ .101.... .0011011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:32 */
                 if (trans_srliw(ctx, &u.f_shift)) return true;
                 return false;
             case 0x20:
                 /* 0100000. ........ .101.... .0011011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:33 */
                 if (trans_sraiw(ctx, &u.f_shift)) return true;
                 return false;
             }
@@ -786,40 +1786,155 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .0100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:101 */
             if (trans_sb(ctx, &u.f_s)) return true;
             return false;
         case 0x1:
             /* ........ ........ .001.... .0100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:102 */
             if (trans_sh(ctx, &u.f_s)) return true;
             return false;
         case 0x2:
             /* ........ ........ .010.... .0100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:103 */
             if (trans_sw(ctx, &u.f_s)) return true;
             return false;
         case 0x3:
             /* ........ ........ .011.... .0100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:29 */
             if (trans_sd(ctx, &u.f_s)) return true;
             return false;
         }
         return false;
     case 0x00000027:
         /* ........ ........ ........ .0100111 */
-        decode_insn32_extract_s(ctx, &u.f_s, insn);
         switch ((insn >> 12) & 0x7) {
+        case 0x0:
+            /* ........ ........ .000.... .0100111 */
+            switch ((insn >> 26) & 0x3) {
+            case 0x0:
+                /* ....00.. ........ .000.... .0100111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch (insn & 0x11f00000) {
+                case 0x00000000:
+                    /* ...000.0 0000.... .000.... .0100111 */
+                    if (trans_vsb_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ....10.. ........ .000.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                switch ((insn >> 28) & 0x1) {
+                case 0x0:
+                    /* ...010.. ........ .000.... .0100111 */
+                    if (trans_vssb_v(ctx, &u.f_rnfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x3:
+                /* ....11.. ........ .000.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vsxb_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
         case 0x2:
             /* ........ ........ .010.... .0100111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:157 */
+            decode_insn32_extract_s(ctx, &u.f_s, insn);
             if (trans_fsw(ctx, &u.f_s)) return true;
             return false;
         case 0x3:
             /* ........ ........ .011.... .0100111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:185 */
+            decode_insn32_extract_s(ctx, &u.f_s, insn);
             if (trans_fsd(ctx, &u.f_s)) return true;
             return false;
+        case 0x5:
+            /* ........ ........ .101.... .0100111 */
+            switch ((insn >> 26) & 0x3) {
+            case 0x0:
+                /* ....00.. ........ .101.... .0100111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch (insn & 0x11f00000) {
+                case 0x00000000:
+                    /* ...000.0 0000.... .101.... .0100111 */
+                    if (trans_vsh_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ....10.. ........ .101.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                switch ((insn >> 28) & 0x1) {
+                case 0x0:
+                    /* ...010.. ........ .101.... .0100111 */
+                    if (trans_vssh_v(ctx, &u.f_rnfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x3:
+                /* ....11.. ........ .101.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vsxh_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
+        case 0x6:
+            /* ........ ........ .110.... .0100111 */
+            switch ((insn >> 26) & 0x3) {
+            case 0x0:
+                /* ....00.. ........ .110.... .0100111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch (insn & 0x11f00000) {
+                case 0x00000000:
+                    /* ...000.0 0000.... .110.... .0100111 */
+                    if (trans_vsw_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ....10.. ........ .110.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                switch ((insn >> 28) & 0x1) {
+                case 0x0:
+                    /* ...010.. ........ .110.... .0100111 */
+                    if (trans_vssw_v(ctx, &u.f_rnfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x3:
+                /* ....11.. ........ .110.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vsxw_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
+        case 0x7:
+            /* ........ ........ .111.... .0100111 */
+            switch ((insn >> 26) & 0x3) {
+            case 0x0:
+                /* ....00.. ........ .111.... .0100111 */
+                decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn);
+                switch (insn & 0x11f00000) {
+                case 0x00000000:
+                    /* ...000.0 0000.... .111.... .0100111 */
+                    if (trans_vse_v(ctx, &u.f_r2nfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x2:
+                /* ....10.. ........ .111.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                switch ((insn >> 28) & 0x1) {
+                case 0x0:
+                    /* ...010.. ........ .111.... .0100111 */
+                    if (trans_vsse_v(ctx, &u.f_rnfvm)) return true;
+                    return false;
+                }
+                return false;
+            case 0x3:
+                /* ....11.. ........ .111.... .0100111 */
+                decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn);
+                if (trans_vsxe_v(ctx, &u.f_rnfvm)) return true;
+                return false;
+            }
+            return false;
         }
         return false;
     case 0x0000002f:
@@ -827,35 +1942,50 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch (insn & 0xf8007000) {
         case 0x00002000:
             /* 00000... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:146 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoadd_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0x00003000:
             /* 00000... ........ .011.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:51 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoadd_d(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x00006000:
+            /* 00000... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoaddw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
+        case 0x00007000:
+            /* 00000... ........ .111.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoaddd_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0x08002000:
             /* 00001... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:145 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoswap_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0x08003000:
             /* 00001... ........ .011.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:50 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoswap_d(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x08006000:
+            /* 00001... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoswapw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
+        case 0x08007000:
+            /* 00001... ........ .111.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoswapd_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0x10002000:
             /* 00010... ........ .010.... .0101111 */
             decode_insn32_extract_atom_ld(ctx, &u.f_atomic, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 00010..0 0000.... .010.... .0101111 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:143 */
                 if (trans_lr_w(ctx, &u.f_atomic)) return true;
                 return false;
             }
@@ -866,107 +1996,160 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 00010..0 0000.... .011.... .0101111 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:48 */
                 if (trans_lr_d(ctx, &u.f_atomic)) return true;
                 return false;
             }
             return false;
         case 0x18002000:
             /* 00011... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:144 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_sc_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0x18003000:
             /* 00011... ........ .011.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:49 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_sc_d(ctx, &u.f_atomic)) return true;
             return false;
         case 0x20002000:
             /* 00100... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:147 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoxor_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0x20003000:
             /* 00100... ........ .011.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:52 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoxor_d(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x20006000:
+            /* 00100... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoxorw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
+        case 0x20007000:
+            /* 00100... ........ .111.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoxord_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0x40002000:
             /* 01000... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:149 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoor_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0x40003000:
             /* 01000... ........ .011.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:54 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoor_d(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x40006000:
+            /* 01000... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoorw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
+        case 0x40007000:
+            /* 01000... ........ .111.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoord_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0x60002000:
             /* 01100... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:148 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoand_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0x60003000:
             /* 01100... ........ .011.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:53 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amoand_d(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x60006000:
+            /* 01100... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoandw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
+        case 0x60007000:
+            /* 01100... ........ .111.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamoandd_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0x80002000:
             /* 10000... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:150 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amomin_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0x80003000:
             /* 10000... ........ .011.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:55 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amomin_d(ctx, &u.f_atomic)) return true;
             return false;
+        case 0x80006000:
+            /* 10000... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamominw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
+        case 0x80007000:
+            /* 10000... ........ .111.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamomind_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0xa0002000:
             /* 10100... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:151 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amomax_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0xa0003000:
             /* 10100... ........ .011.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:56 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amomax_d(ctx, &u.f_atomic)) return true;
             return false;
+        case 0xa0006000:
+            /* 10100... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamomaxw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
+        case 0xa0007000:
+            /* 10100... ........ .111.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamomaxd_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0xc0002000:
             /* 11000... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:152 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amominu_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0xc0003000:
             /* 11000... ........ .011.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:57 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amominu_d(ctx, &u.f_atomic)) return true;
             return false;
+        case 0xc0006000:
+            /* 11000... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamominuw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
+        case 0xc0007000:
+            /* 11000... ........ .111.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamominud_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         case 0xe0002000:
             /* 11100... ........ .010.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:153 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amomaxu_w(ctx, &u.f_atomic)) return true;
             return false;
         case 0xe0003000:
             /* 11100... ........ .011.... .0101111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:58 */
             decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn);
             if (trans_amomaxu_d(ctx, &u.f_atomic)) return true;
             return false;
+        case 0xe0006000:
+            /* 11100... ........ .110.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamomaxuw_v(ctx, &u.f_rwdvm)) return true;
+            return false;
+        case 0xe0007000:
+            /* 11100... ........ .111.... .0101111 */
+            decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn);
+            if (trans_vamomaxud_v(ctx, &u.f_rwdvm)) return true;
+            return false;
         }
         return false;
     case 0x00000033:
@@ -975,99 +2158,80 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch (insn & 0xfe007000) {
         case 0x00000000:
             /* 0000000. ........ .000.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:113 */
             if (trans_add(ctx, &u.f_r)) return true;
             return false;
         case 0x00001000:
             /* 0000000. ........ .001.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:115 */
             if (trans_sll(ctx, &u.f_r)) return true;
             return false;
         case 0x00002000:
             /* 0000000. ........ .010.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:116 */
             if (trans_slt(ctx, &u.f_r)) return true;
             return false;
         case 0x00003000:
             /* 0000000. ........ .011.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:117 */
             if (trans_sltu(ctx, &u.f_r)) return true;
             return false;
         case 0x00004000:
             /* 0000000. ........ .100.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:118 */
             if (trans_xor(ctx, &u.f_r)) return true;
             return false;
         case 0x00005000:
             /* 0000000. ........ .101.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:119 */
             if (trans_srl(ctx, &u.f_r)) return true;
             return false;
         case 0x00006000:
             /* 0000000. ........ .110.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:121 */
             if (trans_or(ctx, &u.f_r)) return true;
             return false;
         case 0x00007000:
             /* 0000000. ........ .111.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:122 */
             if (trans_and(ctx, &u.f_r)) return true;
             return false;
         case 0x02000000:
             /* 0000001. ........ .000.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:133 */
             if (trans_mul(ctx, &u.f_r)) return true;
             return false;
         case 0x02001000:
             /* 0000001. ........ .001.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:134 */
             if (trans_mulh(ctx, &u.f_r)) return true;
             return false;
         case 0x02002000:
             /* 0000001. ........ .010.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:135 */
             if (trans_mulhsu(ctx, &u.f_r)) return true;
             return false;
         case 0x02003000:
             /* 0000001. ........ .011.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:136 */
             if (trans_mulhu(ctx, &u.f_r)) return true;
             return false;
         case 0x02004000:
             /* 0000001. ........ .100.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:137 */
             if (trans_div(ctx, &u.f_r)) return true;
             return false;
         case 0x02005000:
             /* 0000001. ........ .101.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:138 */
             if (trans_divu(ctx, &u.f_r)) return true;
             return false;
         case 0x02006000:
             /* 0000001. ........ .110.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:139 */
             if (trans_rem(ctx, &u.f_r)) return true;
             return false;
         case 0x02007000:
             /* 0000001. ........ .111.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:140 */
             if (trans_remu(ctx, &u.f_r)) return true;
             return false;
         case 0x40000000:
             /* 0100000. ........ .000.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:114 */
             if (trans_sub(ctx, &u.f_r)) return true;
             return false;
         case 0x40005000:
             /* 0100000. ........ .101.... .0110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:120 */
             if (trans_sra(ctx, &u.f_r)) return true;
             return false;
         }
         return false;
     case 0x00000037:
         /* ........ ........ ........ .0110111 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:86 */
         decode_insn32_extract_u(ctx, &u.f_u, insn);
         if (trans_lui(ctx, &u.f_u)) return true;
         return false;
@@ -1077,117 +2241,99 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch (insn & 0xfe007000) {
         case 0x00000000:
             /* 0000000. ........ .000.... .0111011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:34 */
             if (trans_addw(ctx, &u.f_r)) return true;
             return false;
         case 0x00001000:
             /* 0000000. ........ .001.... .0111011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:36 */
             if (trans_sllw(ctx, &u.f_r)) return true;
             return false;
         case 0x00005000:
             /* 0000000. ........ .101.... .0111011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:37 */
             if (trans_srlw(ctx, &u.f_r)) return true;
             return false;
         case 0x02000000:
             /* 0000001. ........ .000.... .0111011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:41 */
             if (trans_mulw(ctx, &u.f_r)) return true;
             return false;
         case 0x02004000:
             /* 0000001. ........ .100.... .0111011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:42 */
             if (trans_divw(ctx, &u.f_r)) return true;
             return false;
         case 0x02005000:
             /* 0000001. ........ .101.... .0111011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:43 */
             if (trans_divuw(ctx, &u.f_r)) return true;
             return false;
         case 0x02006000:
             /* 0000001. ........ .110.... .0111011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:44 */
             if (trans_remw(ctx, &u.f_r)) return true;
             return false;
         case 0x02007000:
             /* 0000001. ........ .111.... .0111011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:45 */
             if (trans_remuw(ctx, &u.f_r)) return true;
             return false;
         case 0x40000000:
             /* 0100000. ........ .000.... .0111011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:35 */
             if (trans_subw(ctx, &u.f_r)) return true;
             return false;
         case 0x40005000:
             /* 0100000. ........ .101.... .0111011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:38 */
             if (trans_sraw(ctx, &u.f_r)) return true;
             return false;
         }
         return false;
     case 0x00000043:
         /* ........ ........ ........ .1000011 */
-        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn);
+        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn);
         switch ((insn >> 25) & 0x3) {
         case 0x0:
             /* .....00. ........ ........ .1000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:158 */
-            if (trans_fmadd_s(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fmadd_s(ctx, &u.f_decode_insn3215)) return true;
             return false;
         case 0x1:
             /* .....01. ........ ........ .1000011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:186 */
-            if (trans_fmadd_d(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fmadd_d(ctx, &u.f_decode_insn3215)) return true;
             return false;
         }
         return false;
     case 0x00000047:
         /* ........ ........ ........ .1000111 */
-        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn);
+        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn);
         switch ((insn >> 25) & 0x3) {
         case 0x0:
             /* .....00. ........ ........ .1000111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:159 */
-            if (trans_fmsub_s(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fmsub_s(ctx, &u.f_decode_insn3215)) return true;
             return false;
         case 0x1:
             /* .....01. ........ ........ .1000111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:187 */
-            if (trans_fmsub_d(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fmsub_d(ctx, &u.f_decode_insn3215)) return true;
             return false;
         }
         return false;
     case 0x0000004b:
         /* ........ ........ ........ .1001011 */
-        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn);
+        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn);
         switch ((insn >> 25) & 0x3) {
         case 0x0:
             /* .....00. ........ ........ .1001011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:160 */
-            if (trans_fnmsub_s(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fnmsub_s(ctx, &u.f_decode_insn3215)) return true;
             return false;
         case 0x1:
             /* .....01. ........ ........ .1001011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:188 */
-            if (trans_fnmsub_d(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fnmsub_d(ctx, &u.f_decode_insn3215)) return true;
             return false;
         }
         return false;
     case 0x0000004f:
         /* ........ ........ ........ .1001111 */
-        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn);
+        decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn);
         switch ((insn >> 25) & 0x3) {
         case 0x0:
             /* .....00. ........ ........ .1001111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:161 */
-            if (trans_fnmadd_s(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fnmadd_s(ctx, &u.f_decode_insn3215)) return true;
             return false;
         case 0x1:
             /* .....01. ........ ........ .1001111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:189 */
-            if (trans_fnmadd_d(ctx, &u.f_decode_insn3210)) return true;
+            if (trans_fnmadd_d(ctx, &u.f_decode_insn3215)) return true;
             return false;
         }
         return false;
@@ -1196,51 +2342,43 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 25) & 0x7f) {
         case 0x0:
             /* 0000000. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:162 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fadd_s(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fadd_s(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x1:
             /* 0000001. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:190 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fadd_d(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fadd_d(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x4:
             /* 0000100. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:163 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fsub_s(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fsub_s(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x5:
             /* 0000101. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:191 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fsub_d(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fsub_d(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x8:
             /* 0001000. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:164 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fmul_s(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fmul_s(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x9:
             /* 0001001. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:192 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fmul_d(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fmul_d(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0xc:
             /* 0001100. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:165 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fdiv_s(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fdiv_s(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0xd:
             /* 0001101. ........ ........ .1010011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:193 */
-            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn);
-            if (trans_fdiv_d(ctx, &u.f_decode_insn3211)) return true;
+            decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn);
+            if (trans_fdiv_d(ctx, &u.f_decode_insn3216)) return true;
             return false;
         case 0x10:
             /* 0010000. ........ ........ .1010011 */
@@ -1248,17 +2386,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 0010000. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:167 */
                 if (trans_fsgnj_s(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 0010000. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:168 */
                 if (trans_fsgnjn_s(ctx, &u.f_r)) return true;
                 return false;
             case 0x2:
                 /* 0010000. ........ .010.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:169 */
                 if (trans_fsgnjx_s(ctx, &u.f_r)) return true;
                 return false;
             }
@@ -1269,17 +2404,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 0010001. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:195 */
                 if (trans_fsgnj_d(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 0010001. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:196 */
                 if (trans_fsgnjn_d(ctx, &u.f_r)) return true;
                 return false;
             case 0x2:
                 /* 0010001. ........ .010.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:197 */
                 if (trans_fsgnjx_d(ctx, &u.f_r)) return true;
                 return false;
             }
@@ -1290,12 +2422,10 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 0010100. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:170 */
                 if (trans_fmin_s(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 0010100. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:171 */
                 if (trans_fmax_s(ctx, &u.f_r)) return true;
                 return false;
             }
@@ -1306,57 +2436,51 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 0010101. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:198 */
                 if (trans_fmin_d(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 0010101. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:199 */
                 if (trans_fmax_d(ctx, &u.f_r)) return true;
                 return false;
             }
             return false;
         case 0x20:
             /* 0100000. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x1:
                 /* 01000000 0001.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:200 */
-                if (trans_fcvt_s_d(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_s_d(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x21:
             /* 0100001. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 01000010 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:201 */
-                if (trans_fcvt_d_s(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_d_s(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x2c:
             /* 0101100. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 01011000 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:166 */
-                if (trans_fsqrt_s(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fsqrt_s(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x2d:
             /* 0101101. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 01011010 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:194 */
-                if (trans_fsqrt_d(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fsqrt_d(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
@@ -1366,17 +2490,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 1010000. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:177 */
                 if (trans_fle_s(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 1010000. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:176 */
                 if (trans_flt_s(ctx, &u.f_r)) return true;
                 return false;
             case 0x2:
                 /* 1010000. ........ .010.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:175 */
                 if (trans_feq_s(ctx, &u.f_r)) return true;
                 return false;
             }
@@ -1387,176 +2508,1772 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0x7) {
             case 0x0:
                 /* 1010001. ........ .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:204 */
                 if (trans_fle_d(ctx, &u.f_r)) return true;
                 return false;
             case 0x1:
                 /* 1010001. ........ .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:203 */
                 if (trans_flt_d(ctx, &u.f_r)) return true;
                 return false;
             case 0x2:
                 /* 1010001. ........ .010.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:202 */
                 if (trans_feq_d(ctx, &u.f_r)) return true;
                 return false;
             }
             return false;
         case 0x60:
             /* 1100000. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 11000000 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:172 */
-                if (trans_fcvt_w_s(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_w_s(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x1:
                 /* 11000000 0001.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:173 */
-                if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x2:
                 /* 11000000 0010.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:61 */
-                if (trans_fcvt_l_s(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_l_s(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x3:
                 /* 11000000 0011.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:62 */
-                if (trans_fcvt_lu_s(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_lu_s(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x61:
             /* 1100001. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 11000010 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:206 */
-                if (trans_fcvt_w_d(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_w_d(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x1:
                 /* 11000010 0001.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:207 */
-                if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x2:
                 /* 11000010 0010.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:67 */
-                if (trans_fcvt_l_d(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_l_d(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x3:
                 /* 11000010 0011.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:68 */
-                if (trans_fcvt_lu_d(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_lu_d(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x68:
             /* 1101000. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 11010000 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:179 */
-                if (trans_fcvt_s_w(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_s_w(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x1:
                 /* 11010000 0001.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:180 */
-                if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x2:
                 /* 11010000 0010.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:63 */
-                if (trans_fcvt_s_l(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_s_l(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x3:
                 /* 11010000 0011.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:64 */
-                if (trans_fcvt_s_lu(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_s_lu(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x69:
             /* 1101001. ........ ........ .1010011 */
-            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn);
+            decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn);
             switch ((insn >> 20) & 0x1f) {
             case 0x0:
                 /* 11010010 0000.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:208 */
-                if (trans_fcvt_d_w(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_d_w(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x1:
                 /* 11010010 0001.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:209 */
-                if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x2:
                 /* 11010010 0010.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:70 */
-                if (trans_fcvt_d_l(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_d_l(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             case 0x3:
                 /* 11010010 0011.... ........ .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:71 */
-                if (trans_fcvt_d_lu(ctx, &u.f_decode_insn3212)) return true;
+                if (trans_fcvt_d_lu(ctx, &u.f_decode_insn3217)) return true;
                 return false;
             }
             return false;
         case 0x70:
             /* 1110000. ........ ........ .1010011 */
-            decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn);
+            decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
             switch (insn & 0x01f07000) {
             case 0x00000000:
                 /* 11100000 0000.... .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:174 */
-                if (trans_fmv_x_w(ctx, &u.f_decode_insn3213)) return true;
+                if (trans_fmv_x_w(ctx, &u.f_decode_insn3218)) return true;
                 return false;
             case 0x00001000:
                 /* 11100000 0000.... .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:178 */
-                if (trans_fclass_s(ctx, &u.f_decode_insn3213)) return true;
+                if (trans_fclass_s(ctx, &u.f_decode_insn3218)) return true;
                 return false;
             }
             return false;
         case 0x71:
             /* 1110001. ........ ........ .1010011 */
-            decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn);
+            decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
             switch (insn & 0x01f07000) {
             case 0x00000000:
                 /* 11100010 0000.... .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:69 */
-                if (trans_fmv_x_d(ctx, &u.f_decode_insn3213)) return true;
+                if (trans_fmv_x_d(ctx, &u.f_decode_insn3218)) return true;
                 return false;
             case 0x00001000:
                 /* 11100010 0000.... .001.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:205 */
-                if (trans_fclass_d(ctx, &u.f_decode_insn3213)) return true;
+                if (trans_fclass_d(ctx, &u.f_decode_insn3218)) return true;
                 return false;
             }
             return false;
         case 0x78:
             /* 1111000. ........ ........ .1010011 */
-            decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn);
+            decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
             switch (insn & 0x01f07000) {
             case 0x00000000:
                 /* 11110000 0000.... .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:181 */
-                if (trans_fmv_w_x(ctx, &u.f_decode_insn3213)) return true;
+                if (trans_fmv_w_x(ctx, &u.f_decode_insn3218)) return true;
                 return false;
             }
             return false;
         case 0x79:
             /* 1111001. ........ ........ .1010011 */
-            decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn);
+            decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
             switch (insn & 0x01f07000) {
             case 0x00000000:
                 /* 11110010 0000.... .000.... .1010011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:72 */
-                if (trans_fmv_d_x(ctx, &u.f_decode_insn3213)) return true;
+                if (trans_fmv_d_x(ctx, &u.f_decode_insn3218)) return true;
+                return false;
+            }
+            return false;
+        }
+        return false;
+    case 0x00000057:
+        /* ........ ........ ........ .1010111 */
+        switch (insn & 0x80007000) {
+        case 0x00000000:
+            /* 0....... ........ .000.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 000000.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vadd_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 000010.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vsub_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 000100.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vminu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 000101.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmin_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 000110.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmaxu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 000111.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmax_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 001001.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vand_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 001010.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vor_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 001011.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vxor_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 001100.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vrgather_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 010000.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100001. ........ .000.... .1010111 */
+                    if (trans_vadc_vvm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x11:
+                /* 010001.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100011. ........ .000.... .1010111 */
+                    if (trans_vmadc_vvm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x12:
+                /* 010010.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100101. ........ .000.... .1010111 */
+                    if (trans_vsbc_vvm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x13:
+                /* 010011.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100111. ........ .000.... .1010111 */
+                    if (trans_vmsbc_vvm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x17:
+                /* 010111.. ........ .000.... .1010111 */
+                switch ((insn >> 25) & 0x1) {
+                case 0x0:
+                    /* 0101110. ........ .000.... .1010111 */
+                    decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn);
+                    if (trans_vmerge_vvm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 0101111. ........ .000.... .1010111 */
+                    decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                    switch ((insn >> 20) & 0x1f) {
+                    case 0x0:
+                        /* 01011110 0000.... .000.... .1010111 */
+                        if (trans_vmv_v_v(ctx, &u.f_decode_insn3218)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x18:
+                /* 011000.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmseq_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x19:
+                /* 011001.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsne_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1a:
+                /* 011010.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsltu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1b:
+                /* 011011.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmslt_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 011100.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsleu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 011101.. ........ .000.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsle_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x00001000:
+            /* 0....... ........ .001.... .1010111 */
+            switch (insn & 0x74000000) {
+            case 0x00000000:
+                /* 0000.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 000000.. ........ .001.... .1010111 */
+                    if (trans_vfadd_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 000010.. ........ .001.... .1010111 */
+                    if (trans_vfsub_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x04000000:
+                /* 0000.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfredsum_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10000000:
+                /* 0001.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 000100.. ........ .001.... .1010111 */
+                    if (trans_vfmin_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 000110.. ........ .001.... .1010111 */
+                    if (trans_vfmax_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x14000000:
+                /* 0001.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 000101.. ........ .001.... .1010111 */
+                    if (trans_vfredmin_vs(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 000111.. ........ .001.... .1010111 */
+                    if (trans_vfredmax_vs(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x20000000:
+                /* 0010.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 001000.. ........ .001.... .1010111 */
+                    if (trans_vfsgnj_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 001010.. ........ .001.... .1010111 */
+                    if (trans_vfsgnjx_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x24000000:
+                /* 0010.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 001001.. ........ .001.... .1010111 */
+                    if (trans_vfsgnjn_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x30000000:
+                /* 0011.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r2rd(ctx, &u.f_decode_insn3220, insn);
+                switch (insn & 0x0a0f8000) {
+                case 0x02000000:
+                    /* 0011001. ....0000 0001.... .1010111 */
+                    if (trans_vfmv_f_s(ctx, &u.f_decode_insn3220)) return true;
+                    return false;
+                }
+                return false;
+            case 0x60000000:
+                /* 0110.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 011000.. ........ .001.... .1010111 */
+                    if (trans_vmfeq_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 011010.. ........ .001.... .1010111 */
+                    if (trans_vmford_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x64000000:
+                /* 0110.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 011001.. ........ .001.... .1010111 */
+                    if (trans_vmfle_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 011011.. ........ .001.... .1010111 */
+                    if (trans_vmflt_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x70000000:
+                /* 0111.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 011100.. ........ .001.... .1010111 */
+                    if (trans_vmfne_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            }
+            return false;
+        case 0x00002000:
+            /* 0....... ........ .010.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 000000.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredsum_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 000001.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredand_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 000010.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredor_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 000011.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredxor_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 000100.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredminu_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 000101.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredmin_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 000110.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredmaxu_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 000111.. ........ .010.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vredmax_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 001100.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0011001. ........ .010.... .1010111 */
+                    if (trans_vext_x_v(ctx, &u.f_r)) return true;
+                    return false;
+                }
+                return false;
+            case 0x14:
+                /* 010100.. ........ .010.... .1010111 */
+                decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                if (trans_vmpopc_m(ctx, &u.f_rmr)) return true;
+                return false;
+            case 0x15:
+                /* 010101.. ........ .010.... .1010111 */
+                decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                if (trans_vmfirst_m(ctx, &u.f_rmr)) return true;
+                return false;
+            case 0x16:
+                /* 010110.. ........ .010.... .1010111 */
+                switch ((insn >> 15) & 0x1f) {
+                case 0x1:
+                    /* 010110.. ....0000 1010.... .1010111 */
+                    decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                    if (trans_vmsbf_m(ctx, &u.f_rmr)) return true;
+                    return false;
+                case 0x2:
+                    /* 010110.. ....0001 0010.... .1010111 */
+                    decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                    if (trans_vmsof_m(ctx, &u.f_rmr)) return true;
+                    return false;
+                case 0x3:
+                    /* 010110.. ....0001 1010.... .1010111 */
+                    decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                    if (trans_vmsif_m(ctx, &u.f_rmr)) return true;
+                    return false;
+                case 0x10:
+                    /* 010110.. ....1000 0010.... .1010111 */
+                    decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                    if (trans_viota_m(ctx, &u.f_rmr)) return true;
+                    return false;
+                case 0x11:
+                    /* 010110.. ....1000 1010.... .1010111 */
+                    decode_insn32_extract_r1_vm(ctx, &u.f_decode_insn3219, insn);
+                    switch ((insn >> 20) & 0x1f) {
+                    case 0x0:
+                        /* 010110.0 00001000 1010.... .1010111 */
+                        if (trans_vid_v(ctx, &u.f_decode_insn3219)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x17:
+                /* 010111.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vcompress_vm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x18:
+                /* 011000.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmandnot_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x19:
+                /* 011001.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmand_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1a:
+                /* 011010.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmor_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1b:
+                /* 011011.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmxor_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1c:
+                /* 011100.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmornot_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1d:
+                /* 011101.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmnand_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1e:
+                /* 011110.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmnor_mm(ctx, &u.f_r)) return true;
+                return false;
+            case 0x1f:
+                /* 011111.. ........ .010.... .1010111 */
+                decode_insn32_extract_r(ctx, &u.f_r, insn);
+                if (trans_vmxnor_mm(ctx, &u.f_r)) return true;
+                return false;
+            }
+            return false;
+        case 0x00003000:
+            /* 0....... ........ .011.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 000000.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vadd_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 000011.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vrsub_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 001001.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vand_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 001010.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vor_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 001011.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vxor_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 001100.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vrgather_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 001110.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslideup_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 001111.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslidedown_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 010000.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100001. ........ .011.... .1010111 */
+                    if (trans_vadc_vim(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x11:
+                /* 010001.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100011. ........ .011.... .1010111 */
+                    if (trans_vmadc_vim(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x17:
+                /* 010111.. ........ .011.... .1010111 */
+                switch ((insn >> 25) & 0x1) {
+                case 0x0:
+                    /* 0101110. ........ .011.... .1010111 */
+                    decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn);
+                    if (trans_vmerge_vim(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 0101111. ........ .011.... .1010111 */
+                    decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                    switch ((insn >> 20) & 0x1f) {
+                    case 0x0:
+                        /* 01011110 0000.... .011.... .1010111 */
+                        if (trans_vmv_v_i(ctx, &u.f_decode_insn3218)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x18:
+                /* 011000.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmseq_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x19:
+                /* 011001.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsne_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 011100.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsleu_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 011101.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsle_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 011110.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsgtu_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 011111.. ........ .011.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsgt_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x00004000:
+            /* 0....... ........ .100.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 000000.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vadd_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 000010.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vsub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 000011.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vrsub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 000100.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vminu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 000101.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmin_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 000110.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmaxu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 000111.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmax_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 001001.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vand_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 001010.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vor_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 001011.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vxor_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 001100.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vrgather_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 001110.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslideup_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 001111.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslidedown_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 010000.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100001. ........ .100.... .1010111 */
+                    if (trans_vadc_vxm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x11:
+                /* 010001.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100011. ........ .100.... .1010111 */
+                    if (trans_vmadc_vxm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x12:
+                /* 010010.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100101. ........ .100.... .1010111 */
+                    if (trans_vsbc_vxm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x13:
+                /* 010011.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 25) & 0x1) {
+                case 0x1:
+                    /* 0100111. ........ .100.... .1010111 */
+                    if (trans_vmsbc_vxm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x17:
+                /* 010111.. ........ .100.... .1010111 */
+                switch ((insn >> 25) & 0x1) {
+                case 0x0:
+                    /* 0101110. ........ .100.... .1010111 */
+                    decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn);
+                    if (trans_vmerge_vxm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 0101111. ........ .100.... .1010111 */
+                    decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                    switch ((insn >> 20) & 0x1f) {
+                    case 0x0:
+                        /* 01011110 0000.... .100.... .1010111 */
+                        if (trans_vmv_v_x(ctx, &u.f_decode_insn3218)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x18:
+                /* 011000.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmseq_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x19:
+                /* 011001.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsne_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1a:
+                /* 011010.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsltu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1b:
+                /* 011011.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmslt_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 011100.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsleu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 011101.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsle_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 011110.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsgtu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 011111.. ........ .100.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmsgt_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x00005000:
+            /* 0....... ........ .101.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 000000.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfadd_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 000010.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfsub_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 000100.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfmin_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 000110.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfmax_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x8:
+                /* 001000.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfsgnj_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 001001.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfsgnjn_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 001010.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfsgnjx_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 001101.. ........ .101.... .1010111 */
+                decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                switch ((insn >> 20) & 0x3f) {
+                case 0x20:
+                    /* 00110110 0000.... .101.... .1010111 */
+                    if (trans_vfmv_s_f(ctx, &u.f_decode_insn3218)) return true;
+                    return false;
+                }
+                return false;
+            case 0x17:
+                /* 010111.. ........ .101.... .1010111 */
+                switch ((insn >> 25) & 0x1) {
+                case 0x0:
+                    /* 0101110. ........ .101.... .1010111 */
+                    decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn);
+                    if (trans_vfmerge_vfm(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 0101111. ........ .101.... .1010111 */
+                    decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                    switch ((insn >> 20) & 0x1f) {
+                    case 0x0:
+                        /* 01011110 0000.... .101.... .1010111 */
+                        if (trans_vfmv_v_f(ctx, &u.f_decode_insn3218)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x18:
+                /* 011000.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmfeq_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x19:
+                /* 011001.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmfle_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1a:
+                /* 011010.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmford_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1b:
+                /* 011011.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmflt_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 011100.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmfne_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 011101.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmfgt_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 011111.. ........ .101.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vmfge_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x00006000:
+            /* 0....... ........ .110.... .1010111 */
+            switch ((insn >> 26) & 0x1f) {
+            case 0xd:
+                /* 001101.. ........ .110.... .1010111 */
+                decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn);
+                switch ((insn >> 20) & 0x3f) {
+                case 0x20:
+                    /* 00110110 0000.... .110.... .1010111 */
+                    if (trans_vmv_s_x(ctx, &u.f_decode_insn3218)) return true;
+                    return false;
+                }
+                return false;
+            case 0xe:
+                /* 001110.. ........ .110.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslide1up_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 001111.. ........ .110.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vslide1down_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x00007000:
+            /* 0....... ........ .111.... .1010111 */
+            decode_insn32_extract_r2_zimm(ctx, &u.f_decode_insn3221, insn);
+            if (trans_vsetvli(ctx, &u.f_decode_insn3221)) return true;
+            return false;
+        case 0x80000000:
+            /* 1....... ........ .000.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .000.... .1010111 */
+                if (trans_vsaddu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .000.... .1010111 */
+                if (trans_vsadd_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 100010.. ........ .000.... .1010111 */
+                if (trans_vssubu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 100011.. ........ .000.... .1010111 */
+                if (trans_vssub_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .000.... .1010111 */
+                if (trans_vaadd_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 100101.. ........ .000.... .1010111 */
+                if (trans_vsll_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 100110.. ........ .000.... .1010111 */
+                if (trans_vasub_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 100111.. ........ .000.... .1010111 */
+                if (trans_vsmul_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x8:
+                /* 101000.. ........ .000.... .1010111 */
+                if (trans_vsrl_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .000.... .1010111 */
+                if (trans_vsra_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 101010.. ........ .000.... .1010111 */
+                if (trans_vssrl_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .000.... .1010111 */
+                if (trans_vssra_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 101100.. ........ .000.... .1010111 */
+                if (trans_vnsrl_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .000.... .1010111 */
+                if (trans_vnsra_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 101110.. ........ .000.... .1010111 */
+                if (trans_vnclipu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .000.... .1010111 */
+                if (trans_vnclip_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 110000.. ........ .000.... .1010111 */
+                if (trans_vwredsumu_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x11:
+                /* 110001.. ........ .000.... .1010111 */
+                if (trans_vwredsum_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 111100.. ........ .000.... .1010111 */
+                if (trans_vwsmaccu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 111101.. ........ .000.... .1010111 */
+                if (trans_vwsmacc_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 111110.. ........ .000.... .1010111 */
+                if (trans_vwsmaccsu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80001000:
+            /* 1....... ........ .001.... .1010111 */
+            switch (insn & 0x74000000) {
+            case 0x00000000:
+                /* 1000.0.. ........ .001.... .1010111 */
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 100000.. ........ .001.... .1010111 */
+                    decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                    if (trans_vfdiv_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 100010.. ........ .001.... .1010111 */
+                    decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                    switch ((insn >> 15) & 0x1f) {
+                    case 0x0:
+                        /* 100010.. ....0000 0001.... .1010111 */
+                        if (trans_vfcvt_xu_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x1:
+                        /* 100010.. ....0000 1001.... .1010111 */
+                        if (trans_vfcvt_x_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x2:
+                        /* 100010.. ....0001 0001.... .1010111 */
+                        if (trans_vfcvt_f_xu_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x3:
+                        /* 100010.. ....0001 1001.... .1010111 */
+                        if (trans_vfcvt_f_x_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x8:
+                        /* 100010.. ....0100 0001.... .1010111 */
+                        if (trans_vfwcvt_xu_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x9:
+                        /* 100010.. ....0100 1001.... .1010111 */
+                        if (trans_vfwcvt_x_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0xa:
+                        /* 100010.. ....0101 0001.... .1010111 */
+                        if (trans_vfwcvt_f_xu_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0xb:
+                        /* 100010.. ....0101 1001.... .1010111 */
+                        if (trans_vfwcvt_f_x_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0xc:
+                        /* 100010.. ....0110 0001.... .1010111 */
+                        if (trans_vfwcvt_f_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x10:
+                        /* 100010.. ....1000 0001.... .1010111 */
+                        if (trans_vfncvt_xu_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x11:
+                        /* 100010.. ....1000 1001.... .1010111 */
+                        if (trans_vfncvt_x_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x12:
+                        /* 100010.. ....1001 0001.... .1010111 */
+                        if (trans_vfncvt_f_xu_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x13:
+                        /* 100010.. ....1001 1001.... .1010111 */
+                        if (trans_vfncvt_f_x_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    case 0x14:
+                        /* 100010.. ....1010 0001.... .1010111 */
+                        if (trans_vfncvt_f_f_v(ctx, &u.f_rmr)) return true;
+                        return false;
+                    }
+                    return false;
+                }
+                return false;
+            case 0x04000000:
+                /* 1000.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn);
+                switch (insn & 0x080f8000) {
+                case 0x08000000:
+                    /* 100011.. ....0000 0001.... .1010111 */
+                    if (trans_vfsqrt_v(ctx, &u.f_rmr)) return true;
+                    return false;
+                case 0x08080000:
+                    /* 100011.. ....1000 0001.... .1010111 */
+                    if (trans_vfclass_v(ctx, &u.f_rmr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x10000000:
+                /* 1001.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 100100.. ........ .001.... .1010111 */
+                    if (trans_vfmul_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x20000000:
+                /* 1010.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 101000.. ........ .001.... .1010111 */
+                    if (trans_vfmadd_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 101010.. ........ .001.... .1010111 */
+                    if (trans_vfmsub_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x24000000:
+                /* 1010.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 101001.. ........ .001.... .1010111 */
+                    if (trans_vfnmadd_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 101011.. ........ .001.... .1010111 */
+                    if (trans_vfnmsub_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x30000000:
+                /* 1011.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 101100.. ........ .001.... .1010111 */
+                    if (trans_vfmacc_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 101110.. ........ .001.... .1010111 */
+                    if (trans_vfmsac_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x34000000:
+                /* 1011.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 101101.. ........ .001.... .1010111 */
+                    if (trans_vfnmacc_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 101111.. ........ .001.... .1010111 */
+                    if (trans_vfnmsac_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x40000000:
+                /* 1100.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 110000.. ........ .001.... .1010111 */
+                    if (trans_vfwadd_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 110010.. ........ .001.... .1010111 */
+                    if (trans_vfwsub_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x44000000:
+                /* 1100.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                if (trans_vfwredsum_vs(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x50000000:
+                /* 1101.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 110100.. ........ .001.... .1010111 */
+                    if (trans_vfwadd_wv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 110110.. ........ .001.... .1010111 */
+                    if (trans_vfwsub_wv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x60000000:
+                /* 1110.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 111000.. ........ .001.... .1010111 */
+                    if (trans_vfwmul_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x70000000:
+                /* 1111.0.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 111100.. ........ .001.... .1010111 */
+                    if (trans_vfwmacc_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 111110.. ........ .001.... .1010111 */
+                    if (trans_vfwmsac_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            case 0x74000000:
+                /* 1111.1.. ........ .001.... .1010111 */
+                decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+                switch ((insn >> 27) & 0x1) {
+                case 0x0:
+                    /* 111101.. ........ .001.... .1010111 */
+                    if (trans_vfwnmacc_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                case 0x1:
+                    /* 111111.. ........ .001.... .1010111 */
+                    if (trans_vfwnmsac_vv(ctx, &u.f_rmrr)) return true;
+                    return false;
+                }
+                return false;
+            }
+            return false;
+        case 0x80002000:
+            /* 1....... ........ .010.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .010.... .1010111 */
+                if (trans_vdivu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .010.... .1010111 */
+                if (trans_vdiv_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 100010.. ........ .010.... .1010111 */
+                if (trans_vremu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 100011.. ........ .010.... .1010111 */
+                if (trans_vrem_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .010.... .1010111 */
+                if (trans_vmulhu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 100101.. ........ .010.... .1010111 */
+                if (trans_vmul_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 100110.. ........ .010.... .1010111 */
+                if (trans_vmulhsu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 100111.. ........ .010.... .1010111 */
+                if (trans_vmulh_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .010.... .1010111 */
+                if (trans_vmadd_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .010.... .1010111 */
+                if (trans_vnmsub_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .010.... .1010111 */
+                if (trans_vmacc_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .010.... .1010111 */
+                if (trans_vnmsac_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 110000.. ........ .010.... .1010111 */
+                if (trans_vwaddu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x11:
+                /* 110001.. ........ .010.... .1010111 */
+                if (trans_vwadd_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x12:
+                /* 110010.. ........ .010.... .1010111 */
+                if (trans_vwsubu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x13:
+                /* 110011.. ........ .010.... .1010111 */
+                if (trans_vwsub_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x14:
+                /* 110100.. ........ .010.... .1010111 */
+                if (trans_vwaddu_wv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x15:
+                /* 110101.. ........ .010.... .1010111 */
+                if (trans_vwadd_wv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x16:
+                /* 110110.. ........ .010.... .1010111 */
+                if (trans_vwsubu_wv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x17:
+                /* 110111.. ........ .010.... .1010111 */
+                if (trans_vwsub_wv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x18:
+                /* 111000.. ........ .010.... .1010111 */
+                if (trans_vwmulu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1a:
+                /* 111010.. ........ .010.... .1010111 */
+                if (trans_vwmulsu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1b:
+                /* 111011.. ........ .010.... .1010111 */
+                if (trans_vwmul_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 111100.. ........ .010.... .1010111 */
+                if (trans_vwmaccu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 111101.. ........ .010.... .1010111 */
+                if (trans_vwmacc_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 111110.. ........ .010.... .1010111 */
+                if (trans_vwmaccsu_vv(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80003000:
+            /* 1....... ........ .011.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .011.... .1010111 */
+                if (trans_vsaddu_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .011.... .1010111 */
+                if (trans_vsadd_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .011.... .1010111 */
+                if (trans_vaadd_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 100101.. ........ .011.... .1010111 */
+                if (trans_vsll_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x8:
+                /* 101000.. ........ .011.... .1010111 */
+                if (trans_vsrl_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .011.... .1010111 */
+                if (trans_vsra_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 101010.. ........ .011.... .1010111 */
+                if (trans_vssrl_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .011.... .1010111 */
+                if (trans_vssra_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 101100.. ........ .011.... .1010111 */
+                if (trans_vnsrl_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .011.... .1010111 */
+                if (trans_vnsra_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 101110.. ........ .011.... .1010111 */
+                if (trans_vnclipu_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .011.... .1010111 */
+                if (trans_vnclip_vi(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80004000:
+            /* 1....... ........ .100.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .100.... .1010111 */
+                if (trans_vsaddu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .100.... .1010111 */
+                if (trans_vsadd_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 100010.. ........ .100.... .1010111 */
+                if (trans_vssubu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 100011.. ........ .100.... .1010111 */
+                if (trans_vssub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .100.... .1010111 */
+                if (trans_vaadd_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 100101.. ........ .100.... .1010111 */
+                if (trans_vsll_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 100110.. ........ .100.... .1010111 */
+                if (trans_vasub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 100111.. ........ .100.... .1010111 */
+                if (trans_vsmul_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x8:
+                /* 101000.. ........ .100.... .1010111 */
+                if (trans_vsrl_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .100.... .1010111 */
+                if (trans_vsra_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 101010.. ........ .100.... .1010111 */
+                if (trans_vssrl_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .100.... .1010111 */
+                if (trans_vssra_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 101100.. ........ .100.... .1010111 */
+                if (trans_vnsrl_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .100.... .1010111 */
+                if (trans_vnsra_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 101110.. ........ .100.... .1010111 */
+                if (trans_vnclipu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .100.... .1010111 */
+                if (trans_vnclip_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 111100.. ........ .100.... .1010111 */
+                if (trans_vwsmaccu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 111101.. ........ .100.... .1010111 */
+                if (trans_vwsmacc_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 111110.. ........ .100.... .1010111 */
+                if (trans_vwsmaccsu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 111111.. ........ .100.... .1010111 */
+                if (trans_vwsmaccus_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80005000:
+            /* 1....... ........ .101.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .101.... .1010111 */
+                if (trans_vfdiv_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .101.... .1010111 */
+                if (trans_vfrdiv_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .101.... .1010111 */
+                if (trans_vfmul_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 100111.. ........ .101.... .1010111 */
+                if (trans_vfrsub_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x8:
+                /* 101000.. ........ .101.... .1010111 */
+                if (trans_vfmadd_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .101.... .1010111 */
+                if (trans_vfnmadd_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xa:
+                /* 101010.. ........ .101.... .1010111 */
+                if (trans_vfmsub_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .101.... .1010111 */
+                if (trans_vfnmsub_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xc:
+                /* 101100.. ........ .101.... .1010111 */
+                if (trans_vfmacc_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .101.... .1010111 */
+                if (trans_vfnmacc_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xe:
+                /* 101110.. ........ .101.... .1010111 */
+                if (trans_vfmsac_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .101.... .1010111 */
+                if (trans_vfnmsac_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 110000.. ........ .101.... .1010111 */
+                if (trans_vfwadd_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x12:
+                /* 110010.. ........ .101.... .1010111 */
+                if (trans_vfwsub_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x14:
+                /* 110100.. ........ .101.... .1010111 */
+                if (trans_vfwadd_wf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x16:
+                /* 110110.. ........ .101.... .1010111 */
+                if (trans_vfwsub_wf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x18:
+                /* 111000.. ........ .101.... .1010111 */
+                if (trans_vfwmul_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 111100.. ........ .101.... .1010111 */
+                if (trans_vfwmacc_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 111101.. ........ .101.... .1010111 */
+                if (trans_vfwnmacc_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 111110.. ........ .101.... .1010111 */
+                if (trans_vfwmsac_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 111111.. ........ .101.... .1010111 */
+                if (trans_vfwnmsac_vf(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80006000:
+            /* 1....... ........ .110.... .1010111 */
+            decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn);
+            switch ((insn >> 26) & 0x1f) {
+            case 0x0:
+                /* 100000.. ........ .110.... .1010111 */
+                if (trans_vdivu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1:
+                /* 100001.. ........ .110.... .1010111 */
+                if (trans_vdiv_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x2:
+                /* 100010.. ........ .110.... .1010111 */
+                if (trans_vremu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x3:
+                /* 100011.. ........ .110.... .1010111 */
+                if (trans_vrem_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x4:
+                /* 100100.. ........ .110.... .1010111 */
+                if (trans_vmulhu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x5:
+                /* 100101.. ........ .110.... .1010111 */
+                if (trans_vmul_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x6:
+                /* 100110.. ........ .110.... .1010111 */
+                if (trans_vmulhsu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x7:
+                /* 100111.. ........ .110.... .1010111 */
+                if (trans_vmulh_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x9:
+                /* 101001.. ........ .110.... .1010111 */
+                if (trans_vmadd_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xb:
+                /* 101011.. ........ .110.... .1010111 */
+                if (trans_vnmsub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xd:
+                /* 101101.. ........ .110.... .1010111 */
+                if (trans_vmacc_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0xf:
+                /* 101111.. ........ .110.... .1010111 */
+                if (trans_vnmsac_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x10:
+                /* 110000.. ........ .110.... .1010111 */
+                if (trans_vwaddu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x11:
+                /* 110001.. ........ .110.... .1010111 */
+                if (trans_vwadd_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x12:
+                /* 110010.. ........ .110.... .1010111 */
+                if (trans_vwsubu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x13:
+                /* 110011.. ........ .110.... .1010111 */
+                if (trans_vwsub_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x14:
+                /* 110100.. ........ .110.... .1010111 */
+                if (trans_vwaddu_wx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x15:
+                /* 110101.. ........ .110.... .1010111 */
+                if (trans_vwadd_wx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x16:
+                /* 110110.. ........ .110.... .1010111 */
+                if (trans_vwsubu_wx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x17:
+                /* 110111.. ........ .110.... .1010111 */
+                if (trans_vwsub_wx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x18:
+                /* 111000.. ........ .110.... .1010111 */
+                if (trans_vwmulu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1a:
+                /* 111010.. ........ .110.... .1010111 */
+                if (trans_vwmulsu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1b:
+                /* 111011.. ........ .110.... .1010111 */
+                if (trans_vwmul_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1c:
+                /* 111100.. ........ .110.... .1010111 */
+                if (trans_vwmaccu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1d:
+                /* 111101.. ........ .110.... .1010111 */
+                if (trans_vwmacc_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1e:
+                /* 111110.. ........ .110.... .1010111 */
+                if (trans_vwmaccsu_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            case 0x1f:
+                /* 111111.. ........ .110.... .1010111 */
+                if (trans_vwmaccus_vx(ctx, &u.f_rmrr)) return true;
+                return false;
+            }
+            return false;
+        case 0x80007000:
+            /* 1....... ........ .111.... .1010111 */
+            decode_insn32_extract_r(ctx, &u.f_r, insn);
+            switch ((insn >> 25) & 0x3f) {
+            case 0x0:
+                /* 1000000. ........ .111.... .1010111 */
+                if (trans_vsetvl(ctx, &u.f_r)) return true;
                 return false;
             }
             return false;
@@ -1568,32 +4285,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:90 */
             if (trans_beq(ctx, &u.f_b)) return true;
             return false;
         case 0x1:
             /* ........ ........ .001.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:91 */
             if (trans_bne(ctx, &u.f_b)) return true;
             return false;
         case 0x4:
             /* ........ ........ .100.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:92 */
             if (trans_blt(ctx, &u.f_b)) return true;
             return false;
         case 0x5:
             /* ........ ........ .101.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:93 */
             if (trans_bge(ctx, &u.f_b)) return true;
             return false;
         case 0x6:
             /* ........ ........ .110.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:94 */
             if (trans_bltu(ctx, &u.f_b)) return true;
             return false;
         case 0x7:
             /* ........ ........ .111.... .1100011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:95 */
             if (trans_bgeu(ctx, &u.f_b)) return true;
             return false;
         }
@@ -1604,14 +4315,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
         switch ((insn >> 12) & 0x7) {
         case 0x0:
             /* ........ ........ .000.... .1100111 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:89 */
             if (trans_jalr(ctx, &u.f_i)) return true;
             return false;
         }
         return false;
     case 0x0000006f:
         /* ........ ........ ........ .1101111 */
-        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:88 */
         decode_insn32_extract_j(ctx, &u.f_j, insn);
         if (trans_jal(ctx, &u.f_j)) return true;
         return false;
@@ -1623,21 +4332,18 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
             switch (insn & 0xfe000f80) {
             case 0x00000000:
                 /* 0000000. ........ .0000000 01110011 */
-                decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn);
+                decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn);
                 switch ((insn >> 15) & 0x3ff) {
                 case 0x0:
                     /* 00000000 00000000 00000000 01110011 */
-                    /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:74 */
                     if (trans_ecall(ctx, &u.f_empty)) return true;
                     return false;
                 case 0x20:
                     /* 00000000 00010000 00000000 01110011 */
-                    /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:75 */
                     if (trans_ebreak(ctx, &u.f_empty)) return true;
                     return false;
                 case 0x40:
                     /* 00000000 00100000 00000000 01110011 */
-                    /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:76 */
                     if (trans_uret(ctx, &u.f_empty)) return true;
                     return false;
                 }
@@ -1647,28 +4353,25 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
                 switch ((insn >> 20) & 0x1f) {
                 case 0x2:
                     /* 00010000 0010.... .0000000 01110011 */
-                    decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn);
+                    decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn);
                     switch ((insn >> 15) & 0x1f) {
                     case 0x0:
                         /* 00010000 00100000 00000000 01110011 */
-                        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:77 */
                         if (trans_sret(ctx, &u.f_empty)) return true;
                         return false;
                     }
                     return false;
                 case 0x4:
                     /* 00010000 0100.... .0000000 01110011 */
-                    /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:83 */
-                    decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3215, insn);
-                    if (trans_sfence_vm(ctx, &u.f_decode_insn3215)) return true;
+                    decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3223, insn);
+                    if (trans_sfence_vm(ctx, &u.f_decode_insn3223)) return true;
                     return false;
                 case 0x5:
                     /* 00010000 0101.... .0000000 01110011 */
-                    decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn);
+                    decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn);
                     switch ((insn >> 15) & 0x1f) {
                     case 0x0:
                         /* 00010000 01010000 00000000 01110011 */
-                        /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:79 */
                         if (trans_wfi(ctx, &u.f_empty)) return true;
                         return false;
                     }
@@ -1677,70 +4380,60 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn)
                 return false;
             case 0x12000000:
                 /* 0001001. ........ .0000000 01110011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:82 */
-                decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3214, insn);
-                if (trans_sfence_vma(ctx, &u.f_decode_insn3214)) return true;
+                decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3222, insn);
+                if (trans_sfence_vma(ctx, &u.f_decode_insn3222)) return true;
                 return false;
             case 0x22000000:
                 /* 0010001. ........ .0000000 01110011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:81 */
-                decode_insn32_extract_hfence_bvma(ctx, &u.f_decode_insn3214, insn);
-                if (trans_hfence_bvma(ctx, &u.f_decode_insn3214)) return true;
+                decode_insn32_extract_hfence_vvma(ctx, &u.f_decode_insn3222, insn);
+                if (trans_hfence_vvma(ctx, &u.f_decode_insn3222)) return true;
                 return false;
             case 0x30000000:
                 /* 0011000. ........ .0000000 01110011 */
-                decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn);
+                decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn);
                 switch ((insn >> 15) & 0x3ff) {
                 case 0x40:
                     /* 00110000 00100000 00000000 01110011 */
-                    /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:78 */
                     if (trans_mret(ctx, &u.f_empty)) return true;
                     return false;
                 }
                 return false;
             case 0x62000000:
                 /* 0110001. ........ .0000000 01110011 */
-                /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:80 */
-                decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3214, insn);
-                if (trans_hfence_gvma(ctx, &u.f_decode_insn3214)) return true;
+                decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3222, insn);
+                if (trans_hfence_gvma(ctx, &u.f_decode_insn3222)) return true;
                 return false;
             }
             return false;
         case 0x1:
             /* ........ ........ .001.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:125 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrw(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrw(ctx, &u.f_decode_insn3214)) return true;
             return false;
         case 0x2:
             /* ........ ........ .010.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:126 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrs(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrs(ctx, &u.f_decode_insn3214)) return true;
             return false;
         case 0x3:
             /* ........ ........ .011.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:127 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrc(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrc(ctx, &u.f_decode_insn3214)) return true;
             return false;
         case 0x5:
             /* ........ ........ .101.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:128 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrwi(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrwi(ctx, &u.f_decode_insn3214)) return true;
             return false;
         case 0x6:
             /* ........ ........ .110.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:129 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrsi(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrsi(ctx, &u.f_decode_insn3214)) return true;
             return false;
         case 0x7:
             /* ........ ........ .111.... .1110011 */
-            /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:130 */
-            decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn);
-            if (trans_csrrci(ctx, &u.f_decode_insn329)) return true;
+            decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn);
+            if (trans_csrrci(ctx, &u.f_decode_insn3214)) return true;
             return false;
         }
         return false;
diff --git a/qemu/target/riscv/translate.c b/qemu/target/riscv/translate.c
index 792bc12fd0..37f11cc481 100644
--- a/qemu/target/riscv/translate.c
+++ b/qemu/target/riscv/translate.c
@@ -56,6 +56,13 @@ typedef struct DisasContext {
        to reset this known value.  */
     int frm;
     bool ext_ifencei;
+    /* vector extension */
+    bool vill;
+    uint8_t lmul;
+    uint8_t sew;
+    uint16_t vlen;
+    uint16_t mlen;
+    bool vl_eq_vlmax;
 
     // Unicorn
     struct uc_struct *uc;
@@ -557,6 +564,11 @@ static void decode_RV32_64C(DisasContext *ctx, uint16_t opcode)
     }
 }
 
+static int ex_plus_1(DisasContext *ctx, int nf)
+{
+    return nf + 1;
+}
+
 #define EX_SH(amount) \
     static int ex_shift_##amount(DisasContext *ctx, int imm) \
     {                                         \
@@ -733,6 +745,8 @@ static bool gen_shift(DisasContext *ctx, arg_r *a,
 #include "insn_trans/trans_rva.inc.c"
 #include "insn_trans/trans_rvf.inc.c"
 #include "insn_trans/trans_rvd.inc.c"
+#include "insn_trans/trans_rvh.inc.c"
+#include "insn_trans/trans_rvv.inc.c"
 #include "insn_trans/trans_privileged.inc.c"
 
 /* Include the auto-generated decoder for 16 bit insn */
@@ -779,13 +793,14 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPURISCVState *env = cs->env_ptr;
     RISCVCPU *cpu = RISCV_CPU(cs);
+    uint32_t tb_flags = ctx->base.tb->flags;
 
     // unicorn setup
     ctx->uc = cs->uc;
 
     ctx->pc_succ_insn = ctx->base.pc_first;
-    ctx->mem_idx = ctx->base.tb->flags & TB_FLAGS_MMU_MASK;
-    ctx->mstatus_fs = ctx->base.tb->flags & TB_FLAGS_MSTATUS_FS;
+    ctx->mem_idx = tb_flags & TB_FLAGS_MMU_MASK;
+    ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
     ctx->priv_ver = env->priv_ver;
 
     if (riscv_has_ext(env, RVH)) {
@@ -807,6 +822,12 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->misa = env->misa;
     ctx->frm = -1;  /* unknown rounding mode */
     ctx->ext_ifencei = cpu->cfg.ext_ifencei;
+    ctx->vlen = cpu->cfg.vlen;
+    ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL);
+    ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
+    ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL);
+    ctx->mlen = 1 << (ctx->sew  + 3 - ctx->lmul);
+    ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
 }
 
 static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
@@ -965,6 +986,7 @@ void riscv_translate_init(struct uc_struct *uc)
     }
 
     tcg_ctx->cpu_pc = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, pc), "pc");
+    tcg_ctx->cpu_vl = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, vl), "vl");
     tcg_ctx->load_res = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, load_res),
                              "load_res");
     tcg_ctx->load_val = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, load_val),
diff --git a/qemu/target/riscv/vector_helper.c b/qemu/target/riscv/vector_helper.c
new file mode 100644
index 0000000000..1c726edf0a
--- /dev/null
+++ b/qemu/target/riscv/vector_helper.c
@@ -0,0 +1,4913 @@
+/*
+ * RISC-V Vector Extension Helpers for QEMU.
+ *
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/memop.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "internals.h"
+#include <math.h>
+
+target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
+                            target_ulong s2)
+{
+    int vlmax, vl;
+    RISCVCPU *cpu = env_archcpu(env);
+    uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
+    uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
+    bool vill = FIELD_EX64(s2, VTYPE, VILL);
+    target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
+
+    if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
+        /* only set vill bit. */
+        FIELD_DP64(0, VTYPE, VILL, 1, env->vtype);
+        env->vl = 0;
+        env->vstart = 0;
+        return 0;
+    }
+
+    vlmax = vext_get_vlmax(cpu, s2);
+    if (s1 <= vlmax) {
+        vl = s1;
+    } else {
+        vl = vlmax;
+    }
+    env->vl = vl;
+    env->vtype = s2;
+    env->vstart = 0;
+    return vl;
+}
+
+/*
+ * Note that vector data is stored in host-endian 64-bit chunks,
+ * so addressing units smaller than that needs a host-endian fixup.
+ */
+#ifdef HOST_WORDS_BIGENDIAN
+#define H1(x)   ((x) ^ 7)
+#define H1_2(x) ((x) ^ 6)
+#define H1_4(x) ((x) ^ 4)
+#define H2(x)   ((x) ^ 3)
+#define H4(x)   ((x) ^ 1)
+#define H8(x)   ((x))
+#else
+#define H1(x)   (x)
+#define H1_2(x) (x)
+#define H1_4(x) (x)
+#define H2(x)   (x)
+#define H4(x)   (x)
+#define H8(x)   (x)
+#endif
+
+static inline uint32_t vext_nf(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, NF);
+}
+
+static inline uint32_t vext_mlen(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, MLEN);
+}
+
+static inline uint32_t vext_vm(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, VM);
+}
+
+static inline uint32_t vext_lmul(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, LMUL);
+}
+
+static uint32_t vext_wd(uint32_t desc)
+{
+    return (simd_data(desc) >> 11) & 0x1;
+}
+
+/*
+ * Get vector group length in bytes. Its range is [64, 2048].
+ *
+ * As simd_desc support at most 256, the max vlen is 512 bits.
+ * So vlen in bytes is encoded as maxsz.
+ */
+static inline uint32_t vext_maxsz(uint32_t desc)
+{
+    return simd_maxsz(desc) << vext_lmul(desc);
+}
+
+/*
+ * This function checks watchpoint before real load operation.
+ *
+ * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
+ * In user mode, there is no watchpoint support now.
+ *
+ * It will trigger an exception if there is no mapping in TLB
+ * and page table walk can't fill the TLB entry. Then the guest
+ * software can return here after process the exception or never return.
+ */
+static void probe_pages(CPURISCVState *env, target_ulong addr,
+                        target_ulong len, uintptr_t ra,
+                        MMUAccessType access_type)
+{
+    target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
+    target_ulong curlen = MIN(pagelen, len);
+
+    probe_access(env, addr, curlen, access_type,
+                 cpu_mmu_index(env, false), ra);
+    if (len > curlen) {
+        addr += curlen;
+        curlen = len - curlen;
+        probe_access(env, addr, curlen, access_type,
+                     cpu_mmu_index(env, false), ra);
+    }
+}
+
+#ifdef HOST_WORDS_BIGENDIAN
+static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
+{
+    /*
+     * Split the remaining range to two parts.
+     * The first part is in the last uint64_t unit.
+     * The second part start from the next uint64_t unit.
+     */
+    int part1 = 0, part2 = tot - cnt;
+    if (cnt % 8) {
+        part1 = 8 - (cnt % 8);
+        part2 = tot - cnt - part1;
+        memset(QEMU_ALIGN_PTR_DOWN(tail, 8), 0, part1);
+        memset(QEMU_ALIGN_PTR_UP(tail, 8), 0, part2);
+    } else {
+        memset(tail, 0, part2);
+    }
+}
+#else
+static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
+{
+    memset(tail, 0, tot - cnt);
+}
+#endif
+
+static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
+{
+    int8_t *cur = ((int8_t *)vd + H1(idx));
+    vext_clear(cur, cnt, tot);
+}
+
+static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
+{
+    int16_t *cur = ((int16_t *)vd + H2(idx));
+    vext_clear(cur, cnt, tot);
+}
+
+static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
+{
+    int32_t *cur = ((int32_t *)vd + H4(idx));
+    vext_clear(cur, cnt, tot);
+}
+
+static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
+{
+    int64_t *cur = (int64_t *)vd + idx;
+    vext_clear(cur, cnt, tot);
+}
+
+static inline void vext_set_elem_mask(void *v0, int mlen, int index,
+        uint8_t value)
+{
+    int idx = (index * mlen) / 64;
+    int pos = (index * mlen) % 64;
+    uint64_t old = ((uint64_t *)v0)[idx];
+    ((uint64_t *)v0)[idx] = deposit64(old, pos, mlen, value);
+}
+
+static inline int vext_elem_mask(void *v0, int mlen, int index)
+{
+    int idx = (index * mlen) / 64;
+    int pos = (index * mlen) % 64;
+    return (((uint64_t *)v0)[idx] >> pos) & 1;
+}
+
+/* elements operations for load and store */
+typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
+                               uint32_t idx, void *vd, uintptr_t retaddr);
+typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot);
+
+#define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF)     \
+static void NAME(CPURISCVState *env, abi_ptr addr,         \
+                 uint32_t idx, void *vd, uintptr_t retaddr)\
+{                                                          \
+    MTYPE data;                                            \
+    ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
+    data = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
+    *cur = data;                                           \
+}                                                          \
+
+GEN_VEXT_LD_ELEM(ldb_b, int8_t,  int8_t,  H1, ldsb)
+GEN_VEXT_LD_ELEM(ldb_h, int8_t,  int16_t, H2, ldsb)
+GEN_VEXT_LD_ELEM(ldb_w, int8_t,  int32_t, H4, ldsb)
+GEN_VEXT_LD_ELEM(ldb_d, int8_t,  int64_t, H8, ldsb)
+GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw)
+GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw)
+GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw)
+GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl)
+GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl)
+GEN_VEXT_LD_ELEM(lde_b, int8_t,  int8_t,  H1, ldsb)
+GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw)
+GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl)
+GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq)
+GEN_VEXT_LD_ELEM(ldbu_b, uint8_t,  uint8_t,  H1, ldub)
+GEN_VEXT_LD_ELEM(ldbu_h, uint8_t,  uint16_t, H2, ldub)
+GEN_VEXT_LD_ELEM(ldbu_w, uint8_t,  uint32_t, H4, ldub)
+GEN_VEXT_LD_ELEM(ldbu_d, uint8_t,  uint64_t, H8, ldub)
+GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw)
+GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw)
+GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw)
+GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl)
+GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl)
+
+#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
+static void NAME(CPURISCVState *env, abi_ptr addr,         \
+                 uint32_t idx, void *vd, uintptr_t retaddr)\
+{                                                          \
+    ETYPE data = *((ETYPE *)vd + H(idx));                  \
+    cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
+}
+
+GEN_VEXT_ST_ELEM(stb_b, int8_t,  H1, stb)
+GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb)
+GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb)
+GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb)
+GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw)
+GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw)
+GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw)
+GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl)
+GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl)
+GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
+GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
+GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
+GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
+
+/*
+ *** stride: access vector element from strided memory
+ */
+static void
+vext_ldst_stride(void *vd, void *v0, target_ulong base,
+                 target_ulong stride, CPURISCVState *env,
+                 uint32_t desc, uint32_t vm,
+                 vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
+                 uint32_t esz, uint32_t msz, uintptr_t ra,
+                 MMUAccessType access_type)
+{
+    uint32_t i, k;
+    uint32_t nf = vext_nf(desc);
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vlmax = vext_maxsz(desc) / esz;
+
+    /* probe every access*/
+    for (i = 0; i < env->vl; i++) {
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        probe_pages(env, base + stride * i, nf * msz, ra, access_type);
+    }
+    /* do real access */
+    for (i = 0; i < env->vl; i++) {
+        k = 0;
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        while (k < nf) {
+            target_ulong addr = base + stride * i + k * msz;
+            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            k++;
+        }
+    }
+    /* clear tail elements */
+    if (clear_elem) {
+        for (k = 0; k < nf; k++) {
+            clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
+        }
+    }
+}
+
+#define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)       \
+void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
+                  target_ulong stride, CPURISCVState *env,              \
+                  uint32_t desc)                                        \
+{                                                                       \
+    uint32_t vm = vext_vm(desc);                                        \
+    vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
+                     CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),            \
+                     GETPC(), MMU_DATA_LOAD);                           \
+}
+
+GEN_VEXT_LD_STRIDE(vlsb_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
+GEN_VEXT_LD_STRIDE(vlsb_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
+GEN_VEXT_LD_STRIDE(vlsb_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
+GEN_VEXT_LD_STRIDE(vlsb_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
+GEN_VEXT_LD_STRIDE(vlsh_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
+GEN_VEXT_LD_STRIDE(vlsh_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
+GEN_VEXT_LD_STRIDE(vlsh_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
+GEN_VEXT_LD_STRIDE(vlsw_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
+GEN_VEXT_LD_STRIDE(vlsw_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
+GEN_VEXT_LD_STRIDE(vlse_v_b,  int8_t,   int8_t,   lde_b,  clearb)
+GEN_VEXT_LD_STRIDE(vlse_v_h,  int16_t,  int16_t,  lde_h,  clearh)
+GEN_VEXT_LD_STRIDE(vlse_v_w,  int32_t,  int32_t,  lde_w,  clearl)
+GEN_VEXT_LD_STRIDE(vlse_v_d,  int64_t,  int64_t,  lde_d,  clearq)
+GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
+GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
+GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
+GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
+GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
+GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
+GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
+GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
+GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
+
+#define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN)                \
+void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
+                  target_ulong stride, CPURISCVState *env,              \
+                  uint32_t desc)                                        \
+{                                                                       \
+    uint32_t vm = vext_vm(desc);                                        \
+    vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
+                     NULL, sizeof(ETYPE), sizeof(MTYPE),                \
+                     GETPC(), MMU_DATA_STORE);                          \
+}
+
+GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t,  int8_t,  stb_b)
+GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t,  int16_t, stb_h)
+GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t,  int32_t, stb_w)
+GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t,  int64_t, stb_d)
+GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h)
+GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w)
+GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d)
+GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w)
+GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d)
+GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t,  int8_t,  ste_b)
+GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h)
+GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w)
+GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d)
+
+/*
+ *** unit-stride: access elements stored contiguously in memory
+ */
+
+/* unmasked unit-stride load and store operation*/
+static void
+vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
+             vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
+             uint32_t esz, uint32_t msz, uintptr_t ra,
+             MMUAccessType access_type)
+{
+    uint32_t i, k;
+    uint32_t nf = vext_nf(desc);
+    uint32_t vlmax = vext_maxsz(desc) / esz;
+
+    /* probe every access */
+    probe_pages(env, base, env->vl * nf * msz, ra, access_type);
+    /* load bytes from guest memory */
+    for (i = 0; i < env->vl; i++) {
+        k = 0;
+        while (k < nf) {
+            target_ulong addr = base + (i * nf + k) * msz;
+            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            k++;
+        }
+    }
+    /* clear tail elements */
+    if (clear_elem) {
+        for (k = 0; k < nf; k++) {
+            clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
+        }
+    }
+}
+
+/*
+ * masked unit-stride load and store operation will be a special case of stride,
+ * stride = NF * sizeof (MTYPE)
+ */
+
+#define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)           \
+void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
+                         CPURISCVState *env, uint32_t desc)             \
+{                                                                       \
+    uint32_t stride = vext_nf(desc) * sizeof(MTYPE);                    \
+    vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
+                     CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),            \
+                     GETPC(), MMU_DATA_LOAD);                           \
+}                                                                       \
+                                                                        \
+void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
+                  CPURISCVState *env, uint32_t desc)                    \
+{                                                                       \
+    vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN,                \
+                 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \
+}
+
+GEN_VEXT_LD_US(vlb_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
+GEN_VEXT_LD_US(vlb_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
+GEN_VEXT_LD_US(vlb_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
+GEN_VEXT_LD_US(vlb_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
+GEN_VEXT_LD_US(vlh_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
+GEN_VEXT_LD_US(vlh_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
+GEN_VEXT_LD_US(vlh_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
+GEN_VEXT_LD_US(vlw_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
+GEN_VEXT_LD_US(vlw_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
+GEN_VEXT_LD_US(vle_v_b,  int8_t,   int8_t,   lde_b,  clearb)
+GEN_VEXT_LD_US(vle_v_h,  int16_t,  int16_t,  lde_h,  clearh)
+GEN_VEXT_LD_US(vle_v_w,  int32_t,  int32_t,  lde_w,  clearl)
+GEN_VEXT_LD_US(vle_v_d,  int64_t,  int64_t,  lde_d,  clearq)
+GEN_VEXT_LD_US(vlbu_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
+GEN_VEXT_LD_US(vlbu_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
+GEN_VEXT_LD_US(vlbu_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
+GEN_VEXT_LD_US(vlbu_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
+GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
+GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
+GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
+GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
+GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
+
+#define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN)                    \
+void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
+                         CPURISCVState *env, uint32_t desc)             \
+{                                                                       \
+    uint32_t stride = vext_nf(desc) * sizeof(MTYPE);                    \
+    vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,  \
+                     NULL, sizeof(ETYPE), sizeof(MTYPE),                \
+                     GETPC(), MMU_DATA_STORE);                          \
+}                                                                       \
+                                                                        \
+void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
+                  CPURISCVState *env, uint32_t desc)                    \
+{                                                                       \
+    vext_ldst_us(vd, base, env, desc, STORE_FN, NULL,                   \
+                 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\
+}
+
+GEN_VEXT_ST_US(vsb_v_b, int8_t,  int8_t , stb_b)
+GEN_VEXT_ST_US(vsb_v_h, int8_t,  int16_t, stb_h)
+GEN_VEXT_ST_US(vsb_v_w, int8_t,  int32_t, stb_w)
+GEN_VEXT_ST_US(vsb_v_d, int8_t,  int64_t, stb_d)
+GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h)
+GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w)
+GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d)
+GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w)
+GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d)
+GEN_VEXT_ST_US(vse_v_b, int8_t,  int8_t , ste_b)
+GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h)
+GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w)
+GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d)
+
+/*
+ *** index: access vector element from indexed memory
+ */
+typedef target_ulong vext_get_index_addr(target_ulong base,
+        uint32_t idx, void *vs2);
+
+#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
+static target_ulong NAME(target_ulong base,            \
+                         uint32_t idx, void *vs2)      \
+{                                                      \
+    return (base + *((ETYPE *)vs2 + H(idx)));          \
+}
+
+GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t,  H1)
+GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2)
+GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4)
+GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8)
+
+static inline void
+vext_ldst_index(void *vd, void *v0, target_ulong base,
+                void *vs2, CPURISCVState *env, uint32_t desc,
+                vext_get_index_addr get_index_addr,
+                vext_ldst_elem_fn *ldst_elem,
+                clear_fn *clear_elem,
+                uint32_t esz, uint32_t msz, uintptr_t ra,
+                MMUAccessType access_type)
+{
+    uint32_t i, k;
+    uint32_t nf = vext_nf(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vlmax = vext_maxsz(desc) / esz;
+
+    /* probe every access*/
+    for (i = 0; i < env->vl; i++) {
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra,
+                    access_type);
+    }
+    /* load bytes from guest memory */
+    for (i = 0; i < env->vl; i++) {
+        k = 0;
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        while (k < nf) {
+            abi_ptr addr = get_index_addr(base, i, vs2) + k * msz;
+            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            k++;
+        }
+    }
+    /* clear tail elements */
+    if (clear_elem) {
+        for (k = 0; k < nf; k++) {
+            clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
+        }
+    }
+}
+
+#define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \
+void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
+                  void *vs2, CPURISCVState *env, uint32_t desc)            \
+{                                                                          \
+    vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
+                    LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),       \
+                    GETPC(), MMU_DATA_LOAD);                               \
+}
+
+GEN_VEXT_LD_INDEX(vlxb_v_b,  int8_t,   int8_t,   idx_b, ldb_b,  clearb)
+GEN_VEXT_LD_INDEX(vlxb_v_h,  int8_t,   int16_t,  idx_h, ldb_h,  clearh)
+GEN_VEXT_LD_INDEX(vlxb_v_w,  int8_t,   int32_t,  idx_w, ldb_w,  clearl)
+GEN_VEXT_LD_INDEX(vlxb_v_d,  int8_t,   int64_t,  idx_d, ldb_d,  clearq)
+GEN_VEXT_LD_INDEX(vlxh_v_h,  int16_t,  int16_t,  idx_h, ldh_h,  clearh)
+GEN_VEXT_LD_INDEX(vlxh_v_w,  int16_t,  int32_t,  idx_w, ldh_w,  clearl)
+GEN_VEXT_LD_INDEX(vlxh_v_d,  int16_t,  int64_t,  idx_d, ldh_d,  clearq)
+GEN_VEXT_LD_INDEX(vlxw_v_w,  int32_t,  int32_t,  idx_w, ldw_w,  clearl)
+GEN_VEXT_LD_INDEX(vlxw_v_d,  int32_t,  int64_t,  idx_d, ldw_d,  clearq)
+GEN_VEXT_LD_INDEX(vlxe_v_b,  int8_t,   int8_t,   idx_b, lde_b,  clearb)
+GEN_VEXT_LD_INDEX(vlxe_v_h,  int16_t,  int16_t,  idx_h, lde_h,  clearh)
+GEN_VEXT_LD_INDEX(vlxe_v_w,  int32_t,  int32_t,  idx_w, lde_w,  clearl)
+GEN_VEXT_LD_INDEX(vlxe_v_d,  int64_t,  int64_t,  idx_d, lde_d,  clearq)
+GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t,  uint8_t,  idx_b, ldbu_b, clearb)
+GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t,  uint16_t, idx_h, ldbu_h, clearh)
+GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t,  uint32_t, idx_w, ldbu_w, clearl)
+GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t,  uint64_t, idx_d, ldbu_d, clearq)
+GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh)
+GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl)
+GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq)
+GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl)
+GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq)
+
+#define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\
+void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
+                  void *vs2, CPURISCVState *env, uint32_t desc)  \
+{                                                                \
+    vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
+                    STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\
+                    GETPC(), MMU_DATA_STORE);                    \
+}
+
+GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t,  int8_t,  idx_b, stb_b)
+GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t,  int16_t, idx_h, stb_h)
+GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t,  int32_t, idx_w, stb_w)
+GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t,  int64_t, idx_d, stb_d)
+GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h)
+GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w)
+GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d)
+GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w)
+GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d)
+GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t,  int8_t,  idx_b, ste_b)
+GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h)
+GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w)
+GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d)
+
+/*
+ *** unit-stride fault-only-fisrt load instructions
+ */
+static inline void
+vext_ldff(void *vd, void *v0, target_ulong base,
+          CPURISCVState *env, uint32_t desc,
+          vext_ldst_elem_fn *ldst_elem,
+          clear_fn *clear_elem,
+          uint32_t esz, uint32_t msz, uintptr_t ra)
+{
+    void *host;
+    uint32_t i, k, vl = 0;
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t nf = vext_nf(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t vlmax = vext_maxsz(desc) / esz;
+    target_ulong addr, offset, remain;
+
+    /* probe every access*/
+    for (i = 0; i < env->vl; i++) {
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        addr = base + nf * i * msz;
+        if (i == 0) {
+            probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
+        } else {
+            /* if it triggers an exception, no need to check watchpoint */
+            remain = nf * msz;
+            while (remain > 0) {
+                offset = -(addr | TARGET_PAGE_MASK);
+                host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
+                                         cpu_mmu_index(env, false));
+                if (host) {
+#ifdef CONFIG_USER_ONLY
+                    if (page_check_range(addr, nf * msz, PAGE_READ) < 0) {
+                        vl = i;
+                        goto ProbeSuccess;
+                    }
+#else
+                    probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
+#endif
+                } else {
+                    vl = i;
+                    goto ProbeSuccess;
+                }
+                if (remain <=  offset) {
+                    break;
+                }
+                remain -= offset;
+                addr += offset;
+            }
+        }
+    }
+ProbeSuccess:
+    /* load bytes from guest memory */
+    if (vl != 0) {
+        env->vl = vl;
+    }
+    for (i = 0; i < env->vl; i++) {
+        k = 0;
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        while (k < nf) {
+            target_ulong addr = base + (i * nf + k) * msz;
+            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            k++;
+        }
+    }
+    /* clear tail elements */
+    if (vl != 0) {
+        return;
+    }
+    for (k = 0; k < nf; k++) {
+        clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
+    }
+}
+
+#define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)     \
+void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
+                  CPURISCVState *env, uint32_t desc)             \
+{                                                                \
+    vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN,        \
+              sizeof(ETYPE), sizeof(MTYPE), GETPC());            \
+}
+
+GEN_VEXT_LDFF(vlbff_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
+GEN_VEXT_LDFF(vlbff_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
+GEN_VEXT_LDFF(vlbff_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
+GEN_VEXT_LDFF(vlbff_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
+GEN_VEXT_LDFF(vlhff_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
+GEN_VEXT_LDFF(vlhff_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
+GEN_VEXT_LDFF(vlhff_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
+GEN_VEXT_LDFF(vlwff_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
+GEN_VEXT_LDFF(vlwff_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
+GEN_VEXT_LDFF(vleff_v_b,  int8_t,   int8_t,   lde_b,  clearb)
+GEN_VEXT_LDFF(vleff_v_h,  int16_t,  int16_t,  lde_h,  clearh)
+GEN_VEXT_LDFF(vleff_v_w,  int32_t,  int32_t,  lde_w,  clearl)
+GEN_VEXT_LDFF(vleff_v_d,  int64_t,  int64_t,  lde_d,  clearq)
+GEN_VEXT_LDFF(vlbuff_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
+GEN_VEXT_LDFF(vlbuff_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
+GEN_VEXT_LDFF(vlbuff_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
+GEN_VEXT_LDFF(vlbuff_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
+GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh)
+GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl)
+GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq)
+GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl)
+GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq)
+
+/*
+ *** Vector AMO Operations (Zvamo)
+ */
+typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr,
+                                  uint32_t wd, uint32_t idx, CPURISCVState *env,
+                                  uintptr_t retaddr);
+
+/* no atomic opreation for vector atomic insructions */
+#define DO_SWAP(N, M) (M)
+#define DO_AND(N, M)  (N & M)
+#define DO_XOR(N, M)  (N ^ M)
+#define DO_OR(N, M)   (N | M)
+#define DO_ADD(N, M)  (N + M)
+
+#define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \
+static void                                                     \
+vext_##NAME##_noatomic_op(void *vs3, target_ulong addr,         \
+                          uint32_t wd, uint32_t idx,            \
+                          CPURISCVState *env, uintptr_t retaddr)\
+{                                                               \
+    typedef int##ESZ##_t ETYPE;                                 \
+    typedef int##MSZ##_t MTYPE;                                 \
+    typedef uint##MSZ##_t UMTYPE UNICORN_UNUSED;                \
+    ETYPE *pe3 = (ETYPE *)vs3 + H(idx);                         \
+    MTYPE  a = cpu_ld##SUF##_data(env, addr), b = *pe3;         \
+                                                                \
+    cpu_st##SUF##_data(env, addr, DO_OP(a, b));                 \
+    if (wd) {                                                   \
+        *pe3 = a;                                               \
+    }                                                           \
+}
+
+/* Signed min/max */
+#define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
+#define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
+
+/* Unsigned min/max */
+#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
+#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
+
+GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w,  32, 32, H4, DO_ADD,  l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w,  32, 32, H4, DO_XOR,  l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w,  32, 32, H4, DO_AND,  l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w,   32, 32, H4, DO_OR,   l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w,  32, 32, H4, DO_MIN,  l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w,  32, 32, H4, DO_MAX,  l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l)
+#ifdef TARGET_RISCV64
+GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d,  64, 32, H8, DO_ADD,  l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d,  64, 64, H8, DO_ADD,  q)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d,  64, 32, H8, DO_XOR,  l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d,  64, 64, H8, DO_XOR,  q)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d,  64, 32, H8, DO_AND,  l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d,  64, 64, H8, DO_AND,  q)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d,   64, 32, H8, DO_OR,   l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d,   64, 64, H8, DO_OR,   q)
+GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d,  64, 32, H8, DO_MIN,  l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d,  64, 64, H8, DO_MIN,  q)
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d,  64, 32, H8, DO_MAX,  l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d,  64, 64, H8, DO_MAX,  q)
+GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q)
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l)
+GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q)
+#endif
+
+static inline void
+vext_amo_noatomic(void *vs3, void *v0, target_ulong base,
+                  void *vs2, CPURISCVState *env, uint32_t desc,
+                  vext_get_index_addr get_index_addr,
+                  vext_amo_noatomic_fn *noatomic_op,
+                  clear_fn *clear_elem,
+                  uint32_t esz, uint32_t msz, uintptr_t ra)
+{
+    uint32_t i;
+    target_long addr;
+    uint32_t wd = vext_wd(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vlmax = vext_maxsz(desc) / esz;
+
+    for (i = 0; i < env->vl; i++) {
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD);
+        probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE);
+    }
+    for (i = 0; i < env->vl; i++) {
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        addr = get_index_addr(base, i, vs2);
+        noatomic_op(vs3, addr, wd, i, env, ra);
+    }
+    clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz);
+}
+
+#define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN)    \
+void HELPER(NAME)(void *vs3, void *v0, target_ulong base,       \
+                  void *vs2, CPURISCVState *env, uint32_t desc) \
+{                                                               \
+    vext_amo_noatomic(vs3, v0, base, vs2, env, desc,            \
+                      INDEX_FN, vext_##NAME##_noatomic_op,      \
+                      CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),   \
+                      GETPC());                                 \
+}
+
+#ifdef TARGET_RISCV64
+GEN_VEXT_AMO(vamoswapw_v_d, int32_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamoswapd_v_d, int64_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamoaddw_v_d,  int32_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamoaddd_v_d,  int64_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamoxorw_v_d,  int32_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamoxord_v_d,  int64_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamoandw_v_d,  int32_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamoandd_v_d,  int64_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamoorw_v_d,   int32_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamoord_v_d,   int64_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamominw_v_d,  int32_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamomind_v_d,  int64_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamomaxw_v_d,  int32_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamomaxd_v_d,  int64_t,  int64_t,  idx_d, clearq)
+GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq)
+GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq)
+GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq)
+GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq)
+#endif
+GEN_VEXT_AMO(vamoswapw_v_w, int32_t,  int32_t,  idx_w, clearl)
+GEN_VEXT_AMO(vamoaddw_v_w,  int32_t,  int32_t,  idx_w, clearl)
+GEN_VEXT_AMO(vamoxorw_v_w,  int32_t,  int32_t,  idx_w, clearl)
+GEN_VEXT_AMO(vamoandw_v_w,  int32_t,  int32_t,  idx_w, clearl)
+GEN_VEXT_AMO(vamoorw_v_w,   int32_t,  int32_t,  idx_w, clearl)
+GEN_VEXT_AMO(vamominw_v_w,  int32_t,  int32_t,  idx_w, clearl)
+GEN_VEXT_AMO(vamomaxw_v_w,  int32_t,  int32_t,  idx_w, clearl)
+GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl)
+GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
+
+/*
+ *** Vector Integer Arithmetic Instructions
+ */
+
+/* expand macro args before macro */
+#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
+
+/* (TD, T1, T2, TX1, TX2) */
+#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
+#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
+#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
+#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
+#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
+#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
+#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
+#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
+#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
+#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
+#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
+#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
+#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
+#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
+#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
+#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
+#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
+#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
+#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
+#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
+#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
+#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
+#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
+#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
+#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
+#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
+#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
+#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
+#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
+#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
+
+/* operation of two vector elements */
+typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
+
+#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
+{                                                               \
+    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
+    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
+}
+#define DO_SUB(N, M) (N - M)
+#define DO_RSUB(N, M) (M - N)
+
+RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
+RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
+RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
+RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
+RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
+RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
+RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
+RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
+
+static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
+                       CPURISCVState *env, uint32_t desc,
+                       uint32_t esz, uint32_t dsz,
+                       opivv2_fn *fn, clear_fn *clearfn)
+{
+    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+    uint32_t i;
+
+    for (i = 0; i < vl; i++) {
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        fn(vd, vs1, vs2, i);
+    }
+    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
+}
+
+/* generate the helpers for OPIVV */
+#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN)             \
+void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
+                  void *vs2, CPURISCVState *env,          \
+                  uint32_t desc)                          \
+{                                                         \
+    do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
+               do_##NAME, CLEAR_FN);                      \
+}
+
+GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)
+
+typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
+
+/*
+ * (T1)s1 gives the real operator type.
+ * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
+ */
+#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
+static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
+{                                                                   \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
+    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
+}
+
+RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
+RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
+RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
+RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
+RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
+RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
+RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
+RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
+RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
+RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
+RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
+RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
+
+static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
+                       CPURISCVState *env, uint32_t desc,
+                       uint32_t esz, uint32_t dsz,
+                       opivx2_fn fn, clear_fn *clearfn)
+{
+    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+    uint32_t i;
+
+    for (i = 0; i < vl; i++) {
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        fn(vd, s1, vs2, i);
+    }
+    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
+}
+
+/* generate the helpers for OPIVX */
+#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN)             \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
+                  void *vs2, CPURISCVState *env,          \
+                  uint32_t desc)                          \
+{                                                         \
+    do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
+               do_##NAME, CLEAR_FN);                      \
+}
+
+GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq)
+
+void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)((char*)d + i) = (uint8_t)b - *(uint8_t *)((char*)a + i);
+    }
+}
+
+void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)((char*)d + i) = (uint16_t)b - *(uint16_t *)((char*)a + i);
+    }
+}
+
+void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)((char*)d + i) = (uint32_t)b - *(uint32_t *)((char*)a + i);
+    }
+}
+
+void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+    intptr_t oprsz = simd_oprsz(desc);
+    intptr_t i;
+
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)((char*)d + i) = b - *(uint64_t *)((char*)a + i);
+    }
+}
+
+/* Vector Widening Integer Add/Subtract */
+#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
+#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
+#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
+#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
+#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
+#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
+#define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
+#define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
+#define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
+#define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
+#define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
+#define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
+RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
+RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
+RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
+RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
+RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
+RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
+RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
+RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
+RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
+RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
+RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
+RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
+RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
+RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
+RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
+RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
+RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
+RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
+RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
+RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
+RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
+RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
+RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
+RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
+GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq)
+
+RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
+RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
+RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
+RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
+RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
+RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
+RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
+RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
+RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
+RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
+RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
+RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
+RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
+RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
+RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
+RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
+RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
+RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
+RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
+RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
+RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
+RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
+RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
+RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
+GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq)
+
+/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
+#define DO_VADC(N, M, C) (N + M + C)
+#define DO_VSBC(N, M, C) (N - M - C)
+
+#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP, CLEAR_FN)    \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
+                  CPURISCVState *env, uint32_t desc)          \
+{                                                             \
+    uint32_t mlen = vext_mlen(desc);                          \
+    uint32_t vl = env->vl;                                    \
+    uint32_t esz = sizeof(ETYPE);                             \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                  \
+    uint32_t i;                                               \
+                                                              \
+    for (i = 0; i < vl; i++) {                                \
+        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
+        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
+        uint8_t carry = vext_elem_mask(v0, mlen, i);          \
+                                                              \
+        *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
+    }                                                         \
+    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                  \
+}
+
+GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC, clearb)
+GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC, clearh)
+GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC, clearl)
+GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC, clearq)
+
+GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC, clearb)
+GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC, clearh)
+GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC, clearl)
+GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC, clearq)
+
+#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP, CLEAR_FN)               \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
+                  CPURISCVState *env, uint32_t desc)                     \
+{                                                                        \
+    uint32_t mlen = vext_mlen(desc);                                     \
+    uint32_t vl = env->vl;                                               \
+    uint32_t esz = sizeof(ETYPE);                                        \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                             \
+    uint32_t i;                                                          \
+                                                                         \
+    for (i = 0; i < vl; i++) {                                           \
+        ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
+        uint8_t carry = vext_elem_mask(v0, mlen, i);                     \
+                                                                         \
+        *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
+    }                                                                    \
+    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                             \
+}
+
+GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC, clearb)
+GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC, clearh)
+GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC, clearl)
+GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC, clearq)
+
+GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC, clearb)
+GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC, clearh)
+GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC, clearl)
+GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq)
+
+#ifdef _MSC_VER
+    #define DO_MADC(N, M, C) (C ? ((N) + (M) + 1) <= (N) : \
+                             ((N) + (M)) < (N))
+#else
+    #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
+                             (__typeof(N))(N + M) < N)
+#endif
+#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
+
+#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
+                  CPURISCVState *env, uint32_t desc)          \
+{                                                             \
+    uint32_t mlen = vext_mlen(desc);                          \
+    uint32_t vl = env->vl;                                    \
+    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
+    uint32_t i;                                               \
+                                                              \
+    for (i = 0; i < vl; i++) {                                \
+        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
+        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
+        uint8_t carry = vext_elem_mask(v0, mlen, i);          \
+                                                              \
+        vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1, carry));\
+    }                                                         \
+    for (; i < vlmax; i++) {                                  \
+        vext_set_elem_mask(vd, mlen, i, 0);                   \
+    }                                                         \
+}
+
+GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
+GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
+GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
+GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
+
+GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
+GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
+GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
+GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
+
+#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
+                  void *vs2, CPURISCVState *env, uint32_t desc) \
+{                                                               \
+    uint32_t mlen = vext_mlen(desc);                            \
+    uint32_t vl = env->vl;                                      \
+    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);          \
+    uint32_t i;                                                 \
+                                                                \
+    for (i = 0; i < vl; i++) {                                  \
+        ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
+        uint8_t carry = vext_elem_mask(v0, mlen, i);            \
+                                                                \
+        vext_set_elem_mask(vd, mlen, i,                         \
+                DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
+    }                                                           \
+    for (; i < vlmax; i++) {                                    \
+        vext_set_elem_mask(vd, mlen, i, 0);                     \
+    }                                                           \
+}
+
+GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
+GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
+GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
+GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
+
+GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
+GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
+GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
+GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
+
+/* Vector Bitwise Logical Instructions */
+RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
+RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
+RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
+RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
+RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
+RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
+RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
+RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
+RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
+RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
+RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
+RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
+GEN_VEXT_VV(vand_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vand_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vand_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vand_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vor_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vor_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vor_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vor_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vxor_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vxor_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vxor_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vxor_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
+RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
+RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
+RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
+RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
+RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
+RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
+RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
+RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
+RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
+RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
+RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
+GEN_VEXT_VX(vand_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vand_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vand_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vand_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vor_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vor_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vor_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vor_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq)
+
+/* Vector Single-Width Bit Shift Instructions */
+#define DO_SLL(N, M)  (N << (M))
+#define DO_SRL(N, M)  (N >> (M))
+
+/* generate the helpers for shift instructions with two vector operators */
+#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK, CLEAR_FN)   \
+void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
+                  void *vs2, CPURISCVState *env, uint32_t desc)           \
+{                                                                         \
+    uint32_t mlen = vext_mlen(desc);                                      \
+    uint32_t vm = vext_vm(desc);                                          \
+    uint32_t vl = env->vl;                                                \
+    uint32_t esz = sizeof(TS1);                                           \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                              \
+    uint32_t i;                                                           \
+                                                                          \
+    for (i = 0; i < vl; i++) {                                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
+            continue;                                                     \
+        }                                                                 \
+        TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
+        TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
+        *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
+    }                                                                     \
+    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                              \
+}
+
+GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7, clearb)
+GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf, clearh)
+GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f, clearl)
+GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f, clearq)
+
+GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb)
+GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh)
+GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl)
+GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq)
+
+GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7, clearb)
+GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
+GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
+GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
+
+/* generate the helpers for shift instructions with one vector and one scalar */
+#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK, CLEAR_FN) \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1,                \
+        void *vs2, CPURISCVState *env, uint32_t desc)                 \
+{                                                                     \
+    uint32_t mlen = vext_mlen(desc);                                  \
+    uint32_t vm = vext_vm(desc);                                      \
+    uint32_t vl = env->vl;                                            \
+    uint32_t esz = sizeof(TD);                                        \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                          \
+    uint32_t i;                                                       \
+                                                                      \
+    for (i = 0; i < vl; i++) {                                        \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                    \
+            continue;                                                 \
+        }                                                             \
+        TS2 s2 = *((TS2 *)vs2 + HS2(i));                              \
+        *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);                      \
+    }                                                                 \
+    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                          \
+}
+
+GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7, clearb)
+GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf, clearh)
+GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f, clearl)
+GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f, clearq)
+
+GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb)
+GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh)
+GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl)
+GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq)
+
+GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb)
+GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
+GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
+GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
+
+/* Vector Narrowing Integer Right Shift Instructions */
+GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf, clearb)
+GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh)
+GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
+GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf, clearb)
+GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
+GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
+GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb)
+GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh)
+GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
+GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb)
+GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
+GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
+
+/* Vector Integer Comparison Instructions */
+#define DO_MSEQ(N, M) (N == M)
+#define DO_MSNE(N, M) (N != M)
+#define DO_MSLT(N, M) (N < M)
+#define DO_MSLE(N, M) (N <= M)
+#define DO_MSGT(N, M) (N > M)
+
+#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
+                  CPURISCVState *env, uint32_t desc)          \
+{                                                             \
+    uint32_t mlen = vext_mlen(desc);                          \
+    uint32_t vm = vext_vm(desc);                              \
+    uint32_t vl = env->vl;                                    \
+    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
+    uint32_t i;                                               \
+                                                              \
+    for (i = 0; i < vl; i++) {                                \
+        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
+        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {            \
+            continue;                                         \
+        }                                                     \
+        vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1));       \
+    }                                                         \
+    for (; i < vlmax; i++) {                                  \
+        vext_set_elem_mask(vd, mlen, i, 0);                   \
+    }                                                         \
+}
+
+GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
+GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
+GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
+GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
+
+GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
+GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
+GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
+GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
+
+GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
+GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
+GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
+GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
+
+GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
+GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
+GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
+GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
+
+GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
+GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
+GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
+GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
+
+GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
+GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
+GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
+GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
+
+#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
+                  CPURISCVState *env, uint32_t desc)                \
+{                                                                   \
+    uint32_t mlen = vext_mlen(desc);                                \
+    uint32_t vm = vext_vm(desc);                                    \
+    uint32_t vl = env->vl;                                          \
+    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
+    uint32_t i;                                                     \
+                                                                    \
+    for (i = 0; i < vl; i++) {                                      \
+        ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                  \
+            continue;                                               \
+        }                                                           \
+        vext_set_elem_mask(vd, mlen, i,                             \
+                DO_OP(s2, (ETYPE)(target_long)s1));                 \
+    }                                                               \
+    for (; i < vlmax; i++) {                                        \
+        vext_set_elem_mask(vd, mlen, i, 0);                         \
+    }                                                               \
+}
+
+GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
+GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
+GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
+GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
+
+GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
+GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
+GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
+GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
+
+GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
+GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
+GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
+GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
+
+GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
+GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
+GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
+GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
+
+GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
+GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
+GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
+GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
+
+GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
+GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
+GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
+GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
+
+GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
+GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
+GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
+GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
+
+GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
+GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
+GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
+GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
+
+/* Vector Integer Min/Max Instructions */
+RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
+RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
+RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
+RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
+RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
+RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
+RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
+RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
+RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
+RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
+RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
+RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
+RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
+RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
+RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
+RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
+GEN_VEXT_VV(vminu_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vminu_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vminu_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vminu_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vmin_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vmin_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vmin_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vmin_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vmaxu_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vmaxu_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vmaxu_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vmaxu_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vmax_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vmax_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vmax_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vmax_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
+RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
+RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
+RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
+RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
+RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
+RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
+RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
+RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
+RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
+RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
+RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
+RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
+RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
+RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
+RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
+GEN_VEXT_VX(vminu_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vminu_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vminu_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vminu_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vmin_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vmin_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vmin_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vmin_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vmaxu_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vmaxu_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vmaxu_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vmaxu_vx_d, 8, 8,  clearq)
+GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq)
+
+/* Vector Single-Width Integer Multiply Instructions */
+#define DO_MUL(N, M) (N * M)
+RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
+RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
+RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
+RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
+GEN_VEXT_VV(vmul_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vmul_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vmul_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vmul_vv_d, 8, 8, clearq)
+
+static int8_t do_mulh_b(int8_t s2, int8_t s1)
+{
+    return (int16_t)s2 * (int16_t)s1 >> 8;
+}
+
+static int16_t do_mulh_h(int16_t s2, int16_t s1)
+{
+    return (int32_t)s2 * (int32_t)s1 >> 16;
+}
+
+static int32_t do_mulh_w(int32_t s2, int32_t s1)
+{
+    return (int64_t)s2 * (int64_t)s1 >> 32;
+}
+
+static int64_t do_mulh_d(int64_t s2, int64_t s1)
+{
+    uint64_t hi_64, lo_64;
+
+    muls64(&lo_64, &hi_64, s1, s2);
+    return hi_64;
+}
+
+static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
+{
+    return (uint16_t)s2 * (uint16_t)s1 >> 8;
+}
+
+static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
+{
+    return (uint32_t)s2 * (uint32_t)s1 >> 16;
+}
+
+static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
+{
+    return (uint64_t)s2 * (uint64_t)s1 >> 32;
+}
+
+static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
+{
+    uint64_t hi_64, lo_64;
+
+    mulu64(&lo_64, &hi_64, s2, s1);
+    return hi_64;
+}
+
+static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
+{
+    return (int16_t)s2 * (uint16_t)s1 >> 8;
+}
+
+static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
+{
+    return (int32_t)s2 * (uint32_t)s1 >> 16;
+}
+
+static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
+{
+    return (int64_t)s2 * (uint64_t)s1 >> 32;
+}
+
+/*
+ * Let  A = signed operand,
+ *      B = unsigned operand
+ *      P = mulu64(A, B), unsigned product
+ *
+ * LET  X = 2 ** 64  - A, 2's complement of A
+ *      SP = signed product
+ * THEN
+ *      IF A < 0
+ *          SP = -X * B
+ *             = -(2 ** 64 - A) * B
+ *             = A * B - 2 ** 64 * B
+ *             = P - 2 ** 64 * B
+ *      ELSE
+ *          SP = P
+ * THEN
+ *      HI_P -= (A < 0 ? B : 0)
+ */
+
+static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
+{
+    uint64_t hi_64, lo_64;
+
+    mulu64(&lo_64, &hi_64, s2, s1);
+
+    hi_64 -= s2 < 0 ? s1 : 0;
+    return hi_64;
+}
+
+RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
+RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
+RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
+RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
+RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
+RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
+RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
+RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
+RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
+RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
+RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
+RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
+GEN_VEXT_VV(vmulh_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vmulh_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vmulh_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vmulh_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vmulhu_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vmulhu_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vmulhu_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vmulhu_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vmulhsu_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vmulhsu_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vmulhsu_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vmulhsu_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
+RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
+RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
+RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
+RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
+RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
+RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
+RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
+RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
+RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
+RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
+RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
+RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
+RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
+RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
+RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
+GEN_VEXT_VX(vmul_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vmul_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vmul_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vmul_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vmulh_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vmulh_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vmulh_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vmulh_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vmulhu_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vmulhu_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vmulhu_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vmulhu_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq)
+
+/* Vector Integer Divide Instructions */
+#ifdef _MSC_VER
+    #define DO_DIVU(N, M) (unlikely(M == 0) ? (UINT64_MAX) : (N) / (M))
+    #define DO_REMU(N, M) (unlikely(M == 0) ? (N) : (N) % (M))
+    #define DO_DIV(N, M)  (unlikely(M == 0) ? (-1) :\
+            unlikely((N == -(N)) && (M == -1)) ? (N) : (N) / (M))
+    #define DO_REM(N, M)  (unlikely(M == 0) ? (N) :\
+            unlikely((N == -(N)) && (M == -1)) ? 0 : (N) % (M))
+#else
+    #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
+    #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
+    #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
+            unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
+    #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
+            unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
+#endif
+
+RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
+RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
+RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
+RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
+RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
+RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
+RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
+RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
+RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
+RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
+RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
+RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
+RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
+RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
+RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
+RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
+GEN_VEXT_VV(vdivu_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vdivu_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vdivu_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vdivu_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vdiv_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vdiv_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vdiv_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vdiv_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vremu_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vremu_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vremu_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vremu_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vrem_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vrem_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vrem_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vrem_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
+RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
+RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
+RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
+RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
+RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
+RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
+RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
+RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
+RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
+RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
+RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
+RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
+RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
+RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
+RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
+GEN_VEXT_VX(vdivu_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vdivu_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vdivu_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vdivu_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vdiv_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vdiv_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vdiv_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vdiv_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vremu_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vremu_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vremu_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vremu_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq)
+
+/* Vector Widening Integer Multiply Instructions */
+RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
+RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
+RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
+RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
+RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
+RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
+RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
+RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
+RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
+GEN_VEXT_VV(vwmul_vv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwmul_vv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwmul_vv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwmulu_vv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwmulu_vv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwmulu_vv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwmulsu_vv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwmulsu_vv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwmulsu_vv_w, 4, 8, clearq)
+
+RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
+RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
+RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
+RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
+RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
+RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
+RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
+RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
+RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
+GEN_VEXT_VX(vwmul_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwmul_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwmul_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwmulu_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwmulu_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq)
+
+/* Vector Single-Width Integer Multiply-Add Instructions */
+#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
+{                                                                  \
+    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
+    TD d = *((TD *)vd + HD(i));                                    \
+    *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
+}
+
+#define DO_MACC(N, M, D) (M * N + D)
+#define DO_NMSAC(N, M, D) (-(M * N) + D)
+#define DO_MADD(N, M, D) (M * D + N)
+#define DO_NMSUB(N, M, D) (-(M * D) + N)
+RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
+RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
+RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
+RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
+RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
+RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
+RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
+RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
+RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
+RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
+RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
+RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
+RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
+RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
+RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
+RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
+GEN_VEXT_VV(vmacc_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vmacc_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vmacc_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vmacc_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vnmsac_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vnmsac_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vnmsac_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vnmsac_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vmadd_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vmadd_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vmadd_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vmadd_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vnmsub_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vnmsub_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vnmsub_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vnmsub_vv_d, 8, 8, clearq)
+
+#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
+static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
+{                                                                   \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
+    TD d = *((TD *)vd + HD(i));                                     \
+    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
+}
+
+RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
+RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
+RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
+RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
+RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
+RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
+RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
+RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
+RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
+RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
+RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
+RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
+RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
+RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
+RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
+RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
+GEN_VEXT_VX(vmacc_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vmacc_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vmacc_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vmacc_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vnmsac_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vnmsac_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vnmsac_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vnmsac_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vmadd_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vmadd_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vmadd_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vmadd_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq)
+
+/* Vector Widening Integer Multiply-Add Instructions */
+RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
+RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
+RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
+RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
+RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
+RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
+RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
+RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
+RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
+GEN_VEXT_VV(vwmaccu_vv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwmaccu_vv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwmaccu_vv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwmacc_vv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwmacc_vv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwmacc_vv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2, clearh)
+GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4, clearl)
+GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8, clearq)
+
+RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
+RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
+RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
+RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
+RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
+RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
+RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
+RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
+RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
+RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
+RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
+RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
+GEN_VEXT_VX(vwmaccu_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwmaccu_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwmaccu_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwmacc_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwmacc_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwmacc_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh)
+GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl)
+GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq)
+
+/* Vector Integer Merge and Move Instructions */
+#define GEN_VEXT_VMV_VV(NAME, ETYPE, H, CLEAR_FN)                    \
+void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
+                  uint32_t desc)                                     \
+{                                                                    \
+    uint32_t vl = env->vl;                                           \
+    uint32_t esz = sizeof(ETYPE);                                    \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
+    uint32_t i;                                                      \
+                                                                     \
+    for (i = 0; i < vl; i++) {                                       \
+        ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
+        *((ETYPE *)vd + H(i)) = s1;                                  \
+    }                                                                \
+    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
+}
+
+GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1, clearb)
+GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2, clearh)
+GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4, clearl)
+GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8, clearq)
+
+#define GEN_VEXT_VMV_VX(NAME, ETYPE, H, CLEAR_FN)                    \
+void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
+                  uint32_t desc)                                     \
+{                                                                    \
+    uint32_t vl = env->vl;                                           \
+    uint32_t esz = sizeof(ETYPE);                                    \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
+    uint32_t i;                                                      \
+                                                                     \
+    for (i = 0; i < vl; i++) {                                       \
+        *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
+    }                                                                \
+    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
+}
+
+GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1, clearb)
+GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2, clearh)
+GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4, clearl)
+GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8, clearq)
+
+#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN)                 \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
+                  CPURISCVState *env, uint32_t desc)                 \
+{                                                                    \
+    uint32_t mlen = vext_mlen(desc);                                 \
+    uint32_t vl = env->vl;                                           \
+    uint32_t esz = sizeof(ETYPE);                                    \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
+    uint32_t i;                                                      \
+                                                                     \
+    for (i = 0; i < vl; i++) {                                       \
+        ETYPE *vt = (!vext_elem_mask(v0, mlen, i) ? vs2 : vs1);      \
+        *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
+    }                                                                \
+    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
+}
+
+GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1, clearb)
+GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh)
+GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl)
+GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq)
+
+#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN)                 \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
+                  void *vs2, CPURISCVState *env, uint32_t desc)      \
+{                                                                    \
+    uint32_t mlen = vext_mlen(desc);                                 \
+    uint32_t vl = env->vl;                                           \
+    uint32_t esz = sizeof(ETYPE);                                    \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
+    uint32_t i;                                                      \
+                                                                     \
+    for (i = 0; i < vl; i++) {                                       \
+        ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
+        ETYPE d = (!vext_elem_mask(v0, mlen, i) ? s2 :               \
+                   (ETYPE)(target_long)s1);                          \
+        *((ETYPE *)vd + H(i)) = d;                                   \
+    }                                                                \
+    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
+}
+
+GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1, clearb)
+GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh)
+GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl)
+GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq)
+
+/*
+ *** Vector Fixed-Point Arithmetic Instructions
+ */
+
+/* Vector Single-Width Saturating Add and Subtract */
+
+/*
+ * As fixed point instructions probably have round mode and saturation,
+ * define common macros for fixed point here.
+ */
+typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
+                          CPURISCVState *env, int vxrm);
+
+#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
+static inline void                                                  \
+do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
+          CPURISCVState *env, int vxrm)                             \
+{                                                                   \
+    TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
+    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
+}
+
+static inline void
+vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
+             CPURISCVState *env,
+             uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm,
+             opivv2_rm_fn *fn)
+{
+    for (uint32_t i = 0; i < vl; i++) {
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        fn(vd, vs1, vs2, i, env, vxrm);
+    }
+}
+
+static inline void
+vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
+             CPURISCVState *env,
+             uint32_t desc, uint32_t esz, uint32_t dsz,
+             opivv2_rm_fn *fn, clear_fn *clearfn)
+{
+    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+
+    switch (env->vxrm) {
+    case 0: /* rnu */
+        vext_vv_rm_1(vd, v0, vs1, vs2,
+                     env, vl, vm, mlen, 0, fn);
+        break;
+    case 1: /* rne */
+        vext_vv_rm_1(vd, v0, vs1, vs2,
+                     env, vl, vm, mlen, 1, fn);
+        break;
+    case 2: /* rdn */
+        vext_vv_rm_1(vd, v0, vs1, vs2,
+                     env, vl, vm, mlen, 2, fn);
+        break;
+    default: /* rod */
+        vext_vv_rm_1(vd, v0, vs1, vs2,
+                     env, vl, vm, mlen, 3, fn);
+        break;
+    }
+
+    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
+}
+
+/* generate helpers for fixed point instructions with OPIVV format */
+#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ, CLEAR_FN)                \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
+                  CPURISCVState *env, uint32_t desc)            \
+{                                                               \
+    vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
+                 do_##NAME, CLEAR_FN);                          \
+}
+
+static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
+{
+    uint8_t res = a + b;
+    if (res < a) {
+        res = UINT8_MAX;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
+                               uint16_t b)
+{
+    uint16_t res = a + b;
+    if (res < a) {
+        res = UINT16_MAX;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
+                               uint32_t b)
+{
+    uint32_t res = a + b;
+    if (res < a) {
+        res = UINT32_MAX;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
+                               uint64_t b)
+{
+    uint64_t res = a + b;
+    if (res < a) {
+        res = UINT64_MAX;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
+RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
+RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
+RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
+GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8, clearq)
+
+typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
+                          CPURISCVState *env, int vxrm);
+
+#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
+static inline void                                                  \
+do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
+          CPURISCVState *env, int vxrm)                             \
+{                                                                   \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
+    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
+}
+
+static inline void
+vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
+             CPURISCVState *env,
+             uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm,
+             opivx2_rm_fn *fn)
+{
+    for (uint32_t i = 0; i < vl; i++) {
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        fn(vd, s1, vs2, i, env, vxrm);
+    }
+}
+
+static inline void
+vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
+             CPURISCVState *env,
+             uint32_t desc, uint32_t esz, uint32_t dsz,
+             opivx2_rm_fn *fn, clear_fn *clearfn)
+{
+    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+
+    switch (env->vxrm) {
+    case 0: /* rnu */
+        vext_vx_rm_1(vd, v0, s1, vs2,
+                     env, vl, vm, mlen, 0, fn);
+        break;
+    case 1: /* rne */
+        vext_vx_rm_1(vd, v0, s1, vs2,
+                     env, vl, vm, mlen, 1, fn);
+        break;
+    case 2: /* rdn */
+        vext_vx_rm_1(vd, v0, s1, vs2,
+                     env, vl, vm, mlen, 2, fn);
+        break;
+    default: /* rod */
+        vext_vx_rm_1(vd, v0, s1, vs2,
+                     env, vl, vm, mlen, 3, fn);
+        break;
+    }
+
+    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
+}
+
+/* generate helpers for fixed point instructions with OPIVX format */
+#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ, CLEAR_FN)          \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
+        void *vs2, CPURISCVState *env, uint32_t desc)     \
+{                                                         \
+    vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
+                 do_##NAME, CLEAR_FN);                    \
+}
+
+RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
+RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
+RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
+RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
+GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4, clearl)
+GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8, clearq)
+
+static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
+{
+    int8_t res = a + b;
+    if ((res ^ a) & (res ^ b) & INT8_MIN) {
+        res = a > 0 ? INT8_MAX : INT8_MIN;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
+{
+    int16_t res = a + b;
+    if ((res ^ a) & (res ^ b) & INT16_MIN) {
+        res = a > 0 ? INT16_MAX : INT16_MIN;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
+{
+    int32_t res = a + b;
+    if ((res ^ a) & (res ^ b) & INT32_MIN) {
+        res = a > 0 ? INT32_MAX : INT32_MIN;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
+{
+    int64_t res = a + b;
+    if ((res ^ a) & (res ^ b) & INT64_MIN) {
+        res = a > 0 ? INT64_MAX : INT64_MIN;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
+RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
+RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
+RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
+GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
+RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
+RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
+RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
+GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4, clearl)
+GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8, clearq)
+
+static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
+{
+    uint8_t res = a - b;
+    if (res > a) {
+        res = 0;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
+                               uint16_t b)
+{
+    uint16_t res = a - b;
+    if (res > a) {
+        res = 0;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
+                               uint32_t b)
+{
+    uint32_t res = a - b;
+    if (res > a) {
+        res = 0;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
+                               uint64_t b)
+{
+    uint64_t res = a - b;
+    if (res > a) {
+        res = 0;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
+RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
+RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
+RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
+GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
+RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
+RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
+RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
+GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4, clearl)
+GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8, clearq)
+
+static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
+{
+    int8_t res = a - b;
+    if ((res ^ a) & (a ^ b) & INT8_MIN) {
+        res = a > 0 ? INT8_MAX : INT8_MIN;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
+{
+    int16_t res = a - b;
+    if ((res ^ a) & (a ^ b) & INT16_MIN) {
+        res = a > 0 ? INT16_MAX : INT16_MIN;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
+{
+    int32_t res = a - b;
+    if ((res ^ a) & (a ^ b) & INT32_MIN) {
+        res = a > 0 ? INT32_MAX : INT32_MIN;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
+{
+    int64_t res = a - b;
+    if ((res ^ a) & (a ^ b) & INT64_MIN) {
+        res = a > 0 ? INT64_MAX : INT64_MIN;
+        env->vxsat = 0x1;
+    }
+    return res;
+}
+
+RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
+RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
+RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
+RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
+GEN_VEXT_VV_RM(vssub_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vssub_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vssub_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_RM(vssub_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
+RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
+RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
+RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
+GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl)
+GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq)
+
+/* Vector Single-Width Averaging Add and Subtract */
+static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
+{
+    uint8_t d = extract64(v, shift, 1);
+    uint8_t d1;
+    uint64_t D1, D2;
+
+    if (shift == 0 || shift > 64) {
+        return 0;
+    }
+
+    d1 = extract64(v, shift - 1, 1);
+    D1 = extract64(v, 0, shift);
+    if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
+        return d1;
+    } else if (vxrm == 1) { /* round-to-nearest-even */
+        if (shift > 1) {
+            D2 = extract64(v, 0, shift - 1);
+            return d1 & ((D2 != 0) | d);
+        } else {
+            return d1 & d;
+        }
+    } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
+        return !d & (D1 != 0);
+    }
+    return 0; /* round-down (truncate) */
+}
+
+static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
+{
+    int64_t res = (int64_t)a + b;
+    uint8_t round = get_round(vxrm, res, 1);
+
+    return (res >> 1) + round;
+}
+
+static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
+{
+    int64_t res = a + b;
+    uint8_t round = get_round(vxrm, res, 1);
+    int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
+
+    /* With signed overflow, bit 64 is inverse of bit 63. */
+    return ((res >> 1) ^ over) + round;
+}
+
+RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
+RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
+RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
+RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
+GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
+RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
+RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
+RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
+GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4, clearl)
+GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8, clearq)
+
+static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
+{
+    int64_t res = (int64_t)a - b;
+    uint8_t round = get_round(vxrm, res, 1);
+
+    return (res >> 1) + round;
+}
+
+static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
+{
+    int64_t res = (int64_t)a - b;
+    uint8_t round = get_round(vxrm, res, 1);
+    int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
+
+    /* With signed overflow, bit 64 is inverse of bit 63. */
+    return ((res >> 1) ^ over) + round;
+}
+
+RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
+RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
+RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
+RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
+GEN_VEXT_VV_RM(vasub_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vasub_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vasub_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_RM(vasub_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
+RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
+RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
+RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
+GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl)
+GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq)
+
+/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
+static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
+{
+    uint8_t round;
+    int16_t res;
+
+    res = (int16_t)a * (int16_t)b;
+    round = get_round(vxrm, res, 7);
+    res   = (res >> 7) + round;
+
+    if (res > INT8_MAX) {
+        env->vxsat = 0x1;
+        return INT8_MAX;
+    } else if (res < INT8_MIN) {
+        env->vxsat = 0x1;
+        return INT8_MIN;
+    } else {
+        return res;
+    }
+}
+
+static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
+{
+    uint8_t round;
+    int32_t res;
+
+    res = (int32_t)a * (int32_t)b;
+    round = get_round(vxrm, res, 15);
+    res   = (res >> 15) + round;
+
+    if (res > INT16_MAX) {
+        env->vxsat = 0x1;
+        return INT16_MAX;
+    } else if (res < INT16_MIN) {
+        env->vxsat = 0x1;
+        return INT16_MIN;
+    } else {
+        return res;
+    }
+}
+
+static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
+{
+    uint8_t round;
+    int64_t res;
+
+    res = (int64_t)a * (int64_t)b;
+    round = get_round(vxrm, res, 31);
+    res   = (res >> 31) + round;
+
+    if (res > INT32_MAX) {
+        env->vxsat = 0x1;
+        return INT32_MAX;
+    } else if (res < INT32_MIN) {
+        env->vxsat = 0x1;
+        return INT32_MIN;
+    } else {
+        return res;
+    }
+}
+
+static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
+{
+    uint8_t round;
+    uint64_t hi_64, lo_64;
+    int64_t res;
+
+    if (a == INT64_MIN && b == INT64_MIN) {
+        env->vxsat = 1;
+        return INT64_MAX;
+    }
+
+    muls64(&lo_64, &hi_64, a, b);
+    round = get_round(vxrm, lo_64, 63);
+    /*
+     * Cannot overflow, as there are always
+     * 2 sign bits after multiply.
+     */
+    res = (hi_64 << 1) | (lo_64 >> 63);
+    if (round) {
+        if (res == INT64_MAX) {
+            env->vxsat = 1;
+        } else {
+            res += 1;
+        }
+    }
+    return res;
+}
+
+RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
+RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
+RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
+RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
+GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
+RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
+RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
+RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
+GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl)
+GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq)
+
+/* Vector Widening Saturating Scaled Multiply-Add */
+static inline uint16_t
+vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b,
+          uint16_t c)
+{
+    uint8_t round;
+    uint16_t res = (uint16_t)a * b;
+
+    round = get_round(vxrm, res, 4);
+    res   = (res >> 4) + round;
+    return saddu16(env, vxrm, c, res);
+}
+
+static inline uint32_t
+vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b,
+           uint32_t c)
+{
+    uint8_t round;
+    uint32_t res = (uint32_t)a * b;
+
+    round = get_round(vxrm, res, 8);
+    res   = (res >> 8) + round;
+    return saddu32(env, vxrm, c, res);
+}
+
+static inline uint64_t
+vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b,
+           uint64_t c)
+{
+    uint8_t round;
+    uint64_t res = (uint64_t)a * b;
+
+    round = get_round(vxrm, res, 16);
+    res   = (res >> 16) + round;
+    return saddu64(env, vxrm, c, res);
+}
+
+#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
+static inline void                                                 \
+do_##NAME(void *vd, void *vs1, void *vs2, int i,                   \
+          CPURISCVState *env, int vxrm)                            \
+{                                                                  \
+    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
+    TD d = *((TD *)vd + HD(i));                                    \
+    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d);                \
+}
+
+RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8)
+RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16)
+RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32)
+GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh)
+GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl)
+GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq)
+
+#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)         \
+static inline void                                                 \
+do_##NAME(void *vd, target_long s1, void *vs2, int i,              \
+          CPURISCVState *env, int vxrm)                            \
+{                                                                  \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
+    TD d = *((TD *)vd + HD(i));                                    \
+    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d);       \
+}
+
+RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8)
+RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16)
+RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32)
+GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh)
+GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl)
+GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq)
+
+static inline int16_t
+vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c)
+{
+    uint8_t round;
+    int16_t res = (int16_t)a * b;
+
+    round = get_round(vxrm, res, 4);
+    res   = (res >> 4) + round;
+    return sadd16(env, vxrm, c, res);
+}
+
+static inline int32_t
+vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c)
+{
+    uint8_t round;
+    int32_t res = (int32_t)a * b;
+
+    round = get_round(vxrm, res, 8);
+    res   = (res >> 8) + round;
+    return sadd32(env, vxrm, c, res);
+
+}
+
+static inline int64_t
+vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c)
+{
+    uint8_t round;
+    int64_t res = (int64_t)a * b;
+
+    round = get_round(vxrm, res, 16);
+    res   = (res >> 16) + round;
+    return sadd64(env, vxrm, c, res);
+}
+
+RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8)
+RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16)
+RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32)
+GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh)
+GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl)
+GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq)
+RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8)
+RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16)
+RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32)
+GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh)
+GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl)
+GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq)
+
+static inline int16_t
+vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c)
+{
+    uint8_t round;
+    int16_t res = a * (int16_t)b;
+
+    round = get_round(vxrm, res, 4);
+    res   = (res >> 4) + round;
+    return ssub16(env, vxrm, c, res);
+}
+
+static inline int32_t
+vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c)
+{
+    uint8_t round;
+    int32_t res = a * (int32_t)b;
+
+    round = get_round(vxrm, res, 8);
+    res   = (res >> 8) + round;
+    return ssub32(env, vxrm, c, res);
+}
+
+static inline int64_t
+vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c)
+{
+    uint8_t round;
+    int64_t res = a * (int64_t)b;
+
+    round = get_round(vxrm, res, 16);
+    res   = (res >> 16) + round;
+    return ssub64(env, vxrm, c, res);
+}
+
+RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8)
+RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16)
+RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32)
+GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh)
+GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl)
+GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq)
+RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8)
+RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16)
+RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32)
+GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh)
+GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl)
+GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq)
+
+static inline int16_t
+vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c)
+{
+    uint8_t round;
+    int16_t res = (int16_t)a * b;
+
+    round = get_round(vxrm, res, 4);
+    res   = (res >> 4) + round;
+    return ssub16(env, vxrm, c, res);
+}
+
+static inline int32_t
+vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c)
+{
+    uint8_t round;
+    int32_t res = (int32_t)a * b;
+
+    round = get_round(vxrm, res, 8);
+    res   = (res >> 8) + round;
+    return ssub32(env, vxrm, c, res);
+}
+
+static inline int64_t
+vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c)
+{
+    uint8_t round;
+    int64_t res = (int64_t)a * b;
+
+    round = get_round(vxrm, res, 16);
+    res   = (res >> 16) + round;
+    return ssub64(env, vxrm, c, res);
+}
+
+RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8)
+RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16)
+RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32)
+GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh)
+GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl)
+GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq)
+
+/* Vector Single-Width Scaling Shift Instructions */
+static inline uint8_t
+vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
+{
+    uint8_t round, shift = b & 0x7;
+    uint8_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    return res;
+}
+static inline uint16_t
+vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
+{
+    uint8_t round, shift = b & 0xf;
+    uint16_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    return res;
+}
+static inline uint32_t
+vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
+{
+    uint8_t round, shift = b & 0x1f;
+    uint32_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    return res;
+}
+static inline uint64_t
+vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
+{
+    uint8_t round, shift = b & 0x3f;
+    uint64_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    return res;
+}
+RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
+RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
+RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
+RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
+GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
+RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
+RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
+RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
+GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4, clearl)
+GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8, clearq)
+
+static inline int8_t
+vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
+{
+    uint8_t round, shift = b & 0x7;
+    int8_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    return res;
+}
+static inline int16_t
+vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
+{
+    uint8_t round, shift = b & 0xf;
+    int16_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    return res;
+}
+static inline int32_t
+vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
+{
+    uint8_t round, shift = b & 0x1f;
+    int32_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    return res;
+}
+static inline int64_t
+vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
+{
+    uint8_t round, shift = b & 0x3f;
+    int64_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    return res;
+}
+
+RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
+RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
+RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
+RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
+GEN_VEXT_VV_RM(vssra_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vssra_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vssra_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_RM(vssra_vv_d, 8, 8, clearq)
+
+RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
+RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
+RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
+RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
+GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl)
+GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq)
+
+/* Vector Narrowing Fixed-Point Clip Instructions */
+static inline int8_t
+vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
+{
+    uint8_t round, shift = b & 0xf;
+    int16_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    if (res > INT8_MAX) {
+        env->vxsat = 0x1;
+        return INT8_MAX;
+    } else if (res < INT8_MIN) {
+        env->vxsat = 0x1;
+        return INT8_MIN;
+    } else {
+        return res;
+    }
+}
+
+static inline int16_t
+vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
+{
+    uint8_t round, shift = b & 0x1f;
+    int32_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    if (res > INT16_MAX) {
+        env->vxsat = 0x1;
+        return INT16_MAX;
+    } else if (res < INT16_MIN) {
+        env->vxsat = 0x1;
+        return INT16_MIN;
+    } else {
+        return res;
+    }
+}
+
+static inline int32_t
+vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
+{
+    uint8_t round, shift = b & 0x3f;
+    int64_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    if (res > INT32_MAX) {
+        env->vxsat = 0x1;
+        return INT32_MAX;
+    } else if (res < INT32_MIN) {
+        env->vxsat = 0x1;
+        return INT32_MIN;
+    } else {
+        return res;
+    }
+}
+
+RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
+RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
+RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
+GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4, clearl)
+
+RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8)
+RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16)
+RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32)
+GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4, clearl)
+
+static inline uint8_t
+vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
+{
+    uint8_t round, shift = b & 0xf;
+    uint16_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    if (res > UINT8_MAX) {
+        env->vxsat = 0x1;
+        return UINT8_MAX;
+    } else {
+        return res;
+    }
+}
+
+static inline uint16_t
+vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
+{
+    uint8_t round, shift = b & 0x1f;
+    uint32_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    if (res > UINT16_MAX) {
+        env->vxsat = 0x1;
+        return UINT16_MAX;
+    } else {
+        return res;
+    }
+}
+
+static inline uint32_t
+vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
+{
+    uint8_t round, shift = b & 0x3f;
+    int64_t res;
+
+    round = get_round(vxrm, a, shift);
+    res   = (a >> shift)  + round;
+    if (res > UINT32_MAX) {
+        env->vxsat = 0x1;
+        return UINT32_MAX;
+    } else {
+        return res;
+    }
+}
+
+RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
+RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
+RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
+GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1, clearb)
+GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4, clearl)
+
+RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8)
+RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16)
+RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32)
+GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb)
+GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh)
+GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl)
+
+/*
+ *** Vector Float Point Arithmetic Instructions
+ */
+/* Vector Single-Width Floating-Point Add/Subtract Instructions */
+#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
+                      CPURISCVState *env)                      \
+{                                                              \
+    TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
+    *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
+}
+
+#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN)         \
+void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
+                  void *vs2, CPURISCVState *env,          \
+                  uint32_t desc)                          \
+{                                                         \
+    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
+    uint32_t mlen = vext_mlen(desc);                      \
+    uint32_t vm = vext_vm(desc);                          \
+    uint32_t vl = env->vl;                                \
+    uint32_t i;                                           \
+                                                          \
+    for (i = 0; i < vl; i++) {                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
+            continue;                                     \
+        }                                                 \
+        do_##NAME(vd, vs1, vs2, i, env);                  \
+    }                                                     \
+    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);             \
+}
+
+RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
+RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
+RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
+GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8, clearq)
+
+#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
+static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
+                      CPURISCVState *env)                      \
+{                                                              \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
+    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
+}
+
+#define GEN_VEXT_VF(NAME, ESZ, DSZ, CLEAR_FN)             \
+void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
+                  void *vs2, CPURISCVState *env,          \
+                  uint32_t desc)                          \
+{                                                         \
+    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
+    uint32_t mlen = vext_mlen(desc);                      \
+    uint32_t vm = vext_vm(desc);                          \
+    uint32_t vl = env->vl;                                \
+    uint32_t i;                                           \
+                                                          \
+    for (i = 0; i < vl; i++) {                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
+            continue;                                     \
+        }                                                 \
+        do_##NAME(vd, s1, vs2, i, env);                   \
+    }                                                     \
+    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);             \
+}
+
+RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
+RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
+RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
+GEN_VEXT_VF(vfadd_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfadd_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfadd_vf_d, 8, 8, clearq)
+
+RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
+RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
+RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
+GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
+RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
+RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
+GEN_VEXT_VF(vfsub_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfsub_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfsub_vf_d, 8, 8, clearq)
+
+static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
+{
+    return float16_sub(b, a, s);
+}
+
+static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
+{
+    return float32_sub(b, a, s);
+}
+
+static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
+{
+    return float64_sub(b, a, s);
+}
+
+RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
+RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
+RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
+GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq)
+
+/* Vector Widening Floating-Point Add/Subtract Instructions */
+static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
+{
+    return float32_add(float16_to_float32(a, true, s),
+            float16_to_float32(b, true, s), s);
+}
+
+static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
+{
+    return float64_add(float32_to_float64(a, s),
+            float32_to_float64(b, s), s);
+
+}
+
+RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
+RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
+GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4, clearl)
+GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8, clearq)
+RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
+RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
+GEN_VEXT_VF(vfwadd_vf_h, 2, 4, clearl)
+GEN_VEXT_VF(vfwadd_vf_w, 4, 8, clearq)
+
+static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
+{
+    return float32_sub(float16_to_float32(a, true, s),
+            float16_to_float32(b, true, s), s);
+}
+
+static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
+{
+    return float64_sub(float32_to_float64(a, s),
+            float32_to_float64(b, s), s);
+
+}
+
+RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
+RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
+GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4, clearl)
+GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8, clearq)
+RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
+RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
+GEN_VEXT_VF(vfwsub_vf_h, 2, 4, clearl)
+GEN_VEXT_VF(vfwsub_vf_w, 4, 8, clearq)
+
+static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
+{
+    return float32_add(a, float16_to_float32(b, true, s), s);
+}
+
+static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
+{
+    return float64_add(a, float32_to_float64(b, s), s);
+}
+
+RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
+RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
+GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4, clearl)
+GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8, clearq)
+RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
+RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
+GEN_VEXT_VF(vfwadd_wf_h, 2, 4, clearl)
+GEN_VEXT_VF(vfwadd_wf_w, 4, 8, clearq)
+
+static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
+{
+    return float32_sub(a, float16_to_float32(b, true, s), s);
+}
+
+static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
+{
+    return float64_sub(a, float32_to_float64(b, s), s);
+}
+
+RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
+RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
+GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4, clearl)
+GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8, clearq)
+RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
+RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
+GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl)
+GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq)
+
+/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
+RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
+RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
+RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
+GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
+RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
+RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
+GEN_VEXT_VF(vfmul_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfmul_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfmul_vf_d, 8, 8, clearq)
+
+RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
+RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
+RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
+GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
+RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
+RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
+GEN_VEXT_VF(vfdiv_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfdiv_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfdiv_vf_d, 8, 8, clearq)
+
+static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
+{
+    return float16_div(b, a, s);
+}
+
+static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
+{
+    return float32_div(b, a, s);
+}
+
+static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
+{
+    return float64_div(b, a, s);
+}
+
+RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
+RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
+RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
+GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq)
+
+/* Vector Widening Floating-Point Multiply */
+static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
+{
+    return float32_mul(float16_to_float32(a, true, s),
+            float16_to_float32(b, true, s), s);
+}
+
+static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
+{
+    return float64_mul(float32_to_float64(a, s),
+            float32_to_float64(b, s), s);
+
+}
+RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
+RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
+GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4, clearl)
+GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8, clearq)
+RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
+RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
+GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl)
+GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq)
+
+/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
+#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
+        CPURISCVState *env)                                        \
+{                                                                  \
+    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
+    TD d = *((TD *)vd + HD(i));                                    \
+    *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
+}
+
+static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
+{
+    return float16_muladd(a, b, d, 0, s);
+}
+
+static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(a, b, d, 0, s);
+}
+
+static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(a, b, d, 0, s);
+}
+
+RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
+RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
+RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
+GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8, clearq)
+
+#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
+static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
+        CPURISCVState *env)                                       \
+{                                                                 \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
+    TD d = *((TD *)vd + HD(i));                                   \
+    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
+}
+
+RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
+RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
+RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
+GEN_VEXT_VF(vfmacc_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfmacc_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfmacc_vf_d, 8, 8, clearq)
+
+static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
+{
+    return float16_muladd(a, b, d,
+            float_muladd_negate_c | float_muladd_negate_product, s);
+}
+
+static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(a, b, d,
+            float_muladd_negate_c | float_muladd_negate_product, s);
+}
+
+static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(a, b, d,
+            float_muladd_negate_c | float_muladd_negate_product, s);
+}
+
+RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
+RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
+RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
+GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
+RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
+RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
+GEN_VEXT_VF(vfnmacc_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfnmacc_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfnmacc_vf_d, 8, 8, clearq)
+
+static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
+{
+    return float16_muladd(a, b, d, float_muladd_negate_c, s);
+}
+
+static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(a, b, d, float_muladd_negate_c, s);
+}
+
+static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(a, b, d, float_muladd_negate_c, s);
+}
+
+RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
+RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
+RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
+GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
+RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
+RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
+GEN_VEXT_VF(vfmsac_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfmsac_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfmsac_vf_d, 8, 8, clearq)
+
+static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
+{
+    return float16_muladd(a, b, d, float_muladd_negate_product, s);
+}
+
+static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(a, b, d, float_muladd_negate_product, s);
+}
+
+static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(a, b, d, float_muladd_negate_product, s);
+}
+
+RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
+RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
+RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
+GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
+RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
+RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
+GEN_VEXT_VF(vfnmsac_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfnmsac_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfnmsac_vf_d, 8, 8, clearq)
+
+static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
+{
+    return float16_muladd(d, b, a, 0, s);
+}
+
+static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(d, b, a, 0, s);
+}
+
+static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(d, b, a, 0, s);
+}
+
+RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
+RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
+RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
+GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
+RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
+RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
+GEN_VEXT_VF(vfmadd_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfmadd_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfmadd_vf_d, 8, 8, clearq)
+
+static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
+{
+    return float16_muladd(d, b, a,
+            float_muladd_negate_c | float_muladd_negate_product, s);
+}
+
+static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(d, b, a,
+            float_muladd_negate_c | float_muladd_negate_product, s);
+}
+
+static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(d, b, a,
+            float_muladd_negate_c | float_muladd_negate_product, s);
+}
+
+RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
+RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
+RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
+GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
+RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
+RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
+GEN_VEXT_VF(vfnmadd_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfnmadd_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfnmadd_vf_d, 8, 8, clearq)
+
+static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
+{
+    return float16_muladd(d, b, a, float_muladd_negate_c, s);
+}
+
+static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(d, b, a, float_muladd_negate_c, s);
+}
+
+static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(d, b, a, float_muladd_negate_c, s);
+}
+
+RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
+RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
+RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
+GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
+RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
+RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
+GEN_VEXT_VF(vfmsub_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfmsub_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfmsub_vf_d, 8, 8, clearq)
+
+static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
+{
+    return float16_muladd(d, b, a, float_muladd_negate_product, s);
+}
+
+static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(d, b, a, float_muladd_negate_product, s);
+}
+
+static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(d, b, a, float_muladd_negate_product, s);
+}
+
+RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
+RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
+RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
+GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
+RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
+RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
+GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq)
+
+/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
+static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(float16_to_float32(a, true, s),
+                        float16_to_float32(b, true, s), d, 0, s);
+}
+
+static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(float32_to_float64(a, s),
+                        float32_to_float64(b, s), d, 0, s);
+}
+
+RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
+RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
+GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4, clearl)
+GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8, clearq)
+RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
+RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
+GEN_VEXT_VF(vfwmacc_vf_h, 2, 4, clearl)
+GEN_VEXT_VF(vfwmacc_vf_w, 4, 8, clearq)
+
+static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(float16_to_float32(a, true, s),
+                        float16_to_float32(b, true, s), d,
+                        float_muladd_negate_c | float_muladd_negate_product, s);
+}
+
+static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(float32_to_float64(a, s),
+                        float32_to_float64(b, s), d,
+                        float_muladd_negate_c | float_muladd_negate_product, s);
+}
+
+RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
+RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
+GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4, clearl)
+GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8, clearq)
+RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
+RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
+GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4, clearl)
+GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8, clearq)
+
+static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(float16_to_float32(a, true, s),
+                        float16_to_float32(b, true, s), d,
+                        float_muladd_negate_c, s);
+}
+
+static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(float32_to_float64(a, s),
+                        float32_to_float64(b, s), d,
+                        float_muladd_negate_c, s);
+}
+
+RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
+RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
+GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4, clearl)
+GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8, clearq)
+RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
+RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
+GEN_VEXT_VF(vfwmsac_vf_h, 2, 4, clearl)
+GEN_VEXT_VF(vfwmsac_vf_w, 4, 8, clearq)
+
+static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+{
+    return float32_muladd(float16_to_float32(a, true, s),
+                        float16_to_float32(b, true, s), d,
+                        float_muladd_negate_product, s);
+}
+
+static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+{
+    return float64_muladd(float32_to_float64(a, s),
+                        float32_to_float64(b, s), d,
+                        float_muladd_negate_product, s);
+}
+
+RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
+RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
+GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4, clearl)
+GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8, clearq)
+RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
+RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
+GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl)
+GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq)
+
+/* Vector Floating-Point Square-Root Instruction */
+/* (TD, T2, TX2) */
+#define OP_UU_H uint16_t, uint16_t, uint16_t
+#define OP_UU_W uint32_t, uint32_t, uint32_t
+#define OP_UU_D uint64_t, uint64_t, uint64_t
+
+#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
+static void do_##NAME(void *vd, void *vs2, int i,      \
+        CPURISCVState *env)                            \
+{                                                      \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
+    *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
+}
+
+#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ, CLEAR_FN)       \
+void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
+        CPURISCVState *env, uint32_t desc)             \
+{                                                      \
+    uint32_t vlmax = vext_maxsz(desc) / ESZ;           \
+    uint32_t mlen = vext_mlen(desc);                   \
+    uint32_t vm = vext_vm(desc);                       \
+    uint32_t vl = env->vl;                             \
+    uint32_t i;                                        \
+                                                       \
+    if (vl == 0) {                                     \
+        return;                                        \
+    }                                                  \
+    for (i = 0; i < vl; i++) {                         \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {     \
+            continue;                                  \
+        }                                              \
+        do_##NAME(vd, vs2, i, env);                    \
+    }                                                  \
+    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);          \
+}
+
+RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
+RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
+RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
+GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh)
+GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl)
+GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq)
+
+/* Vector Floating-Point MIN/MAX Instructions */
+RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum)
+RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum)
+RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum)
+GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum)
+RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum)
+RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum)
+GEN_VEXT_VF(vfmin_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfmin_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfmin_vf_d, 8, 8, clearq)
+
+RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum)
+RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum)
+RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum)
+GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum)
+RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum)
+RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum)
+GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq)
+
+/* Vector Floating-Point Sign-Injection Instructions */
+static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
+{
+    return deposit64(b, 0, 15, a);
+}
+
+static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
+{
+    return deposit64(b, 0, 31, a);
+}
+
+static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
+{
+    return deposit64(b, 0, 63, a);
+}
+
+RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
+RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
+RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
+GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
+RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
+RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
+GEN_VEXT_VF(vfsgnj_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfsgnj_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfsgnj_vf_d, 8, 8, clearq)
+
+static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
+{
+    return deposit64(~b, 0, 15, a);
+}
+
+static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
+{
+    return deposit64(~b, 0, 31, a);
+}
+
+static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
+{
+    return deposit64(~b, 0, 63, a);
+}
+
+RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
+RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
+RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
+GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
+RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
+RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
+GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8, clearq)
+
+static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
+{
+    return deposit64(b ^ a, 0, 15, a);
+}
+
+static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
+{
+    return deposit64(b ^ a, 0, 31, a);
+}
+
+static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
+{
+    return deposit64(b ^ a, 0, 63, a);
+}
+
+RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
+RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
+RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
+GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2, clearh)
+GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4, clearl)
+GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8, clearq)
+RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
+RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
+RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
+GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh)
+GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl)
+GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq)
+
+/* Vector Floating-Point Compare Instructions */
+#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
+                  CPURISCVState *env, uint32_t desc)          \
+{                                                             \
+    uint32_t mlen = vext_mlen(desc);                          \
+    uint32_t vm = vext_vm(desc);                              \
+    uint32_t vl = env->vl;                                    \
+    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
+    uint32_t i;                                               \
+                                                              \
+    for (i = 0; i < vl; i++) {                                \
+        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
+        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {            \
+            continue;                                         \
+        }                                                     \
+        vext_set_elem_mask(vd, mlen, i,                       \
+                           DO_OP(s2, s1, &env->fp_status));   \
+    }                                                         \
+    for (; i < vlmax; i++) {                                  \
+        vext_set_elem_mask(vd, mlen, i, 0);                   \
+    }                                                         \
+}
+
+static bool float16_eq_quiet(uint16_t a, uint16_t b, float_status *s)
+{
+    FloatRelation compare = float16_compare_quiet(a, b, s);
+    return compare == float_relation_equal;
+}
+
+GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
+GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
+GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
+
+#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
+void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
+                  CPURISCVState *env, uint32_t desc)                \
+{                                                                   \
+    uint32_t mlen = vext_mlen(desc);                                \
+    uint32_t vm = vext_vm(desc);                                    \
+    uint32_t vl = env->vl;                                          \
+    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
+    uint32_t i;                                                     \
+                                                                    \
+    for (i = 0; i < vl; i++) {                                      \
+        ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                  \
+            continue;                                               \
+        }                                                           \
+        vext_set_elem_mask(vd, mlen, i,                             \
+                           DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
+    }                                                               \
+    for (; i < vlmax; i++) {                                        \
+        vext_set_elem_mask(vd, mlen, i, 0);                         \
+    }                                                               \
+}
+
+GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
+GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
+GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
+
+static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
+{
+    FloatRelation compare = float16_compare_quiet(a, b, s);
+    return compare != float_relation_equal;
+}
+
+static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
+{
+    FloatRelation compare = float32_compare_quiet(a, b, s);
+    return compare != float_relation_equal;
+}
+
+static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
+{
+    FloatRelation compare = float64_compare_quiet(a, b, s);
+    return compare != float_relation_equal;
+}
+
+GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
+GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
+GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
+GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
+GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
+GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
+
+static bool float16_lt(uint16_t a, uint16_t b, float_status *s)
+{
+    FloatRelation compare = float16_compare(a, b, s);
+    return compare == float_relation_less;
+}
+
+GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
+GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
+GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
+GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
+GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
+GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
+
+static bool float16_le(uint16_t a, uint16_t b, float_status *s)
+{
+    FloatRelation compare = float16_compare(a, b, s);
+    return compare == float_relation_less ||
+           compare == float_relation_equal;
+}
+
+GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
+GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
+GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
+GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
+GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
+GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
+
+static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
+{
+    FloatRelation compare = float16_compare(a, b, s);
+    return compare == float_relation_greater;
+}
+
+static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
+{
+    FloatRelation compare = float32_compare(a, b, s);
+    return compare == float_relation_greater;
+}
+
+static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
+{
+    FloatRelation compare = float64_compare(a, b, s);
+    return compare == float_relation_greater;
+}
+
+GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
+GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
+GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
+
+static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
+{
+    FloatRelation compare = float16_compare(a, b, s);
+    return compare == float_relation_greater ||
+           compare == float_relation_equal;
+}
+
+static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
+{
+    FloatRelation compare = float32_compare(a, b, s);
+    return compare == float_relation_greater ||
+           compare == float_relation_equal;
+}
+
+static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
+{
+    FloatRelation compare = float64_compare(a, b, s);
+    return compare == float_relation_greater ||
+           compare == float_relation_equal;
+}
+
+GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
+GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
+GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
+
+static bool float16_unordered_quiet(uint16_t a, uint16_t b, float_status *s)
+{
+    FloatRelation compare = float16_compare_quiet(a, b, s);
+    return compare == float_relation_unordered;
+}
+
+GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet)
+GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet)
+GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet)
+GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet)
+GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet)
+GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet)
+
+/* Vector Floating-Point Classify Instruction */
+#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
+static void do_##NAME(void *vd, void *vs2, int i)      \
+{                                                      \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
+    *((TD *)vd + HD(i)) = OP(s2);                      \
+}
+
+#define GEN_VEXT_V(NAME, ESZ, DSZ, CLEAR_FN)           \
+void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
+                  CPURISCVState *env, uint32_t desc)   \
+{                                                      \
+    uint32_t vlmax = vext_maxsz(desc) / ESZ;           \
+    uint32_t mlen = vext_mlen(desc);                   \
+    uint32_t vm = vext_vm(desc);                       \
+    uint32_t vl = env->vl;                             \
+    uint32_t i;                                        \
+                                                       \
+    for (i = 0; i < vl; i++) {                         \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {     \
+            continue;                                  \
+        }                                              \
+        do_##NAME(vd, vs2, i);                         \
+    }                                                  \
+    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);          \
+}
+
+target_ulong fclass_h(uint64_t frs1)
+{
+    float16 f = frs1;
+    bool sign = float16_is_neg(f);
+
+    if (float16_is_infinity(f)) {
+        return sign ? 1 << 0 : 1 << 7;
+    } else if (float16_is_zero(f)) {
+        return sign ? 1 << 3 : 1 << 4;
+    } else if (float16_is_zero_or_denormal(f)) {
+        return sign ? 1 << 2 : 1 << 5;
+    } else if (float16_is_any_nan(f)) {
+        float_status s = { }; /* for snan_bit_is_one */
+        return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
+    } else {
+        return sign ? 1 << 1 : 1 << 6;
+    }
+}
+
+target_ulong fclass_s(uint64_t frs1)
+{
+    float32 f = frs1;
+    bool sign = float32_is_neg(f);
+
+    if (float32_is_infinity(f)) {
+        return sign ? 1 << 0 : 1 << 7;
+    } else if (float32_is_zero(f)) {
+        return sign ? 1 << 3 : 1 << 4;
+    } else if (float32_is_zero_or_denormal(f)) {
+        return sign ? 1 << 2 : 1 << 5;
+    } else if (float32_is_any_nan(f)) {
+        float_status s = { }; /* for snan_bit_is_one */
+        return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
+    } else {
+        return sign ? 1 << 1 : 1 << 6;
+    }
+}
+
+target_ulong fclass_d(uint64_t frs1)
+{
+    float64 f = frs1;
+    bool sign = float64_is_neg(f);
+
+    if (float64_is_infinity(f)) {
+        return sign ? 1 << 0 : 1 << 7;
+    } else if (float64_is_zero(f)) {
+        return sign ? 1 << 3 : 1 << 4;
+    } else if (float64_is_zero_or_denormal(f)) {
+        return sign ? 1 << 2 : 1 << 5;
+    } else if (float64_is_any_nan(f)) {
+        float_status s = { }; /* for snan_bit_is_one */
+        return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
+    } else {
+        return sign ? 1 << 1 : 1 << 6;
+    }
+}
+
+RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
+RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
+RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
+GEN_VEXT_V(vfclass_v_h, 2, 2, clearh)
+GEN_VEXT_V(vfclass_v_w, 4, 4, clearl)
+GEN_VEXT_V(vfclass_v_d, 8, 8, clearq)
+
+/* Vector Floating-Point Merge Instruction */
+#define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN)              \
+void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
+                  CPURISCVState *env, uint32_t desc)          \
+{                                                             \
+    uint32_t mlen = vext_mlen(desc);                          \
+    uint32_t vm = vext_vm(desc);                              \
+    uint32_t vl = env->vl;                                    \
+    uint32_t esz = sizeof(ETYPE);                             \
+    uint32_t vlmax = vext_maxsz(desc) / esz;                  \
+    uint32_t i;                                               \
+                                                              \
+    for (i = 0; i < vl; i++) {                                \
+        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
+        *((ETYPE *)vd + H(i))                                 \
+          = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1);  \
+    }                                                         \
+    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                  \
+}
+
+GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh)
+GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl)
+GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq)
+
+/* Single-Width Floating-Point/Integer Type-Convert Instructions */
+/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
+RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
+RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
+RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2, clearh)
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4, clearl)
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8, clearq)
+
+/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
+RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
+RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
+RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
+GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2, clearh)
+GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4, clearl)
+GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8, clearq)
+
+/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
+RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
+RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
+RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2, clearh)
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4, clearl)
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8, clearq)
+
+/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
+RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
+RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
+RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
+GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh)
+GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl)
+GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq)
+
+/* Widening Floating-Point/Integer Type-Convert Instructions */
+/* (TD, T2, TX2) */
+#define WOP_UU_H uint32_t, uint16_t, uint16_t
+#define WOP_UU_W uint64_t, uint32_t, uint32_t
+/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
+RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
+RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4, clearl)
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8, clearq)
+
+/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
+RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
+RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4, clearl)
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8, clearq)
+
+/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
+RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
+RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4, clearl)
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8, clearq)
+
+/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
+RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
+RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4, clearl)
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8, clearq)
+
+/*
+ * vfwcvt.f.f.v vd, vs2, vm #
+ * Convert single-width float to double-width float.
+ */
+static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
+{
+    return float16_to_float32(a, true, s);
+}
+
+RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
+RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl)
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq)
+
+/* Narrowing Floating-Point/Integer Type-Convert Instructions */
+/* (TD, T2, TX2) */
+#define NOP_UU_H uint16_t, uint32_t, uint32_t
+#define NOP_UU_W uint32_t, uint64_t, uint64_t
+/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
+RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16)
+RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32)
+GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2, clearh)
+GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4, clearl)
+
+/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
+RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16)
+RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32)
+GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2, clearh)
+GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4, clearl)
+
+/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
+RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16)
+RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32)
+GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2, clearh)
+GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4, clearl)
+
+/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
+RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16)
+RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32)
+GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2, clearh)
+GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4, clearl)
+
+/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
+static uint16_t vfncvtffv16(uint32_t a, float_status *s)
+{
+    return float32_to_float16(a, true, s);
+}
+
+RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
+RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
+GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh)
+GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl)
+
+/*
+ *** Vector Reduction Operations
+ */
+/* Vector Single-Width Integer Reduction Instructions */
+#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
+void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
+        void *vs2, CPURISCVState *env, uint32_t desc)     \
+{                                                         \
+    uint32_t mlen = vext_mlen(desc);                      \
+    uint32_t vm = vext_vm(desc);                          \
+    uint32_t vl = env->vl;                                \
+    uint32_t i;                                           \
+    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;        \
+    TD s1 =  *((TD *)vs1 + HD(0));                        \
+                                                          \
+    for (i = 0; i < vl; i++) {                            \
+        TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
+            continue;                                     \
+        }                                                 \
+        s1 = OP(s1, (TD)s2);                              \
+    }                                                     \
+    *((TD *)vd + HD(0)) = s1;                             \
+    CLEAR_FN(vd, 1, sizeof(TD), tot);                     \
+}
+
+/* vd[0] = sum(vs1[0], vs2[*]) */
+GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD, clearb)
+GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD, clearh)
+GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD, clearl)
+GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD, clearq)
+
+/* vd[0] = maxu(vs1[0], vs2[*]) */
+GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX, clearb)
+GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX, clearh)
+GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX, clearl)
+GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX, clearq)
+
+/* vd[0] = max(vs1[0], vs2[*]) */
+GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX, clearb)
+GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX, clearh)
+GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX, clearl)
+GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX, clearq)
+
+/* vd[0] = minu(vs1[0], vs2[*]) */
+GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN, clearb)
+GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN, clearh)
+GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN, clearl)
+GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN, clearq)
+
+/* vd[0] = min(vs1[0], vs2[*]) */
+GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN, clearb)
+GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN, clearh)
+GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN, clearl)
+GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN, clearq)
+
+/* vd[0] = and(vs1[0], vs2[*]) */
+GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND, clearb)
+GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND, clearh)
+GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND, clearl)
+GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND, clearq)
+
+/* vd[0] = or(vs1[0], vs2[*]) */
+GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR, clearb)
+GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR, clearh)
+GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR, clearl)
+GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR, clearq)
+
+/* vd[0] = xor(vs1[0], vs2[*]) */
+GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb)
+GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh)
+GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl)
+GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq)
+
+/* Vector Widening Integer Reduction Instructions */
+/* signed sum reduction into double-width accumulator */
+GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD, clearh)
+GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD, clearl)
+GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq)
+
+/* Unsigned sum reduction into double-width accumulator */
+GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh)
+GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl)
+GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq)
+
+/* Vector Single-Width Floating-Point Reduction Instructions */
+#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
+void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
+                  void *vs2, CPURISCVState *env,           \
+                  uint32_t desc)                           \
+{                                                          \
+    uint32_t mlen = vext_mlen(desc);                       \
+    uint32_t vm = vext_vm(desc);                           \
+    uint32_t vl = env->vl;                                 \
+    uint32_t i;                                            \
+    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;         \
+    TD s1 =  *((TD *)vs1 + HD(0));                         \
+                                                           \
+    for (i = 0; i < vl; i++) {                             \
+        TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {         \
+            continue;                                      \
+        }                                                  \
+        s1 = OP(s1, (TD)s2, &env->fp_status);              \
+    }                                                      \
+    *((TD *)vd + HD(0)) = s1;                              \
+    CLEAR_FN(vd, 1, sizeof(TD), tot);                      \
+}
+
+/* Unordered sum */
+GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh)
+GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl)
+GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq)
+
+/* Maximum value */
+GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, clearh)
+GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, clearl)
+GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq)
+
+/* Minimum value */
+GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh)
+GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl)
+GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq)
+
+/* Vector Widening Floating-Point Reduction Instructions */
+/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
+void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
+                            void *vs2, CPURISCVState *env, uint32_t desc)
+{
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+    uint32_t i;
+    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
+    uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
+
+    for (i = 0; i < vl; i++) {
+        uint16_t s2 = *((uint16_t *)vs2 + H2(i));
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
+                         &env->fp_status);
+    }
+    *((uint32_t *)vd + H4(0)) = s1;
+    clearl(vd, 1, sizeof(uint32_t), tot);
+}
+
+void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
+                            void *vs2, CPURISCVState *env, uint32_t desc)
+{
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+    uint32_t i;
+    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
+    uint64_t s1 =  *((uint64_t *)vs1);
+
+    for (i = 0; i < vl; i++) {
+        uint32_t s2 = *((uint32_t *)vs2 + H4(i));
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
+                         &env->fp_status);
+    }
+    *((uint64_t *)vd) = s1;
+    clearq(vd, 1, sizeof(uint64_t), tot);
+}
+
+/*
+ *** Vector Mask Operations
+ */
+/* Vector Mask-Register Logical Instructions */
+#define GEN_VEXT_MASK_VV(NAME, OP)                        \
+void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
+                  void *vs2, CPURISCVState *env,          \
+                  uint32_t desc)                          \
+{                                                         \
+    uint32_t mlen = vext_mlen(desc);                      \
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;   \
+    uint32_t vl = env->vl;                                \
+    uint32_t i;                                           \
+    int a, b;                                             \
+                                                          \
+    for (i = 0; i < vl; i++) {                            \
+        a = vext_elem_mask(vs1, mlen, i);                 \
+        b = vext_elem_mask(vs2, mlen, i);                 \
+        vext_set_elem_mask(vd, mlen, i, OP(b, a));        \
+    }                                                     \
+    for (; i < vlmax; i++) {                              \
+        vext_set_elem_mask(vd, mlen, i, 0);               \
+    }                                                     \
+}
+
+#define DO_NAND(N, M)  (!(N & M))
+#define DO_ANDNOT(N, M)  (N & !M)
+#define DO_NOR(N, M)  (!(N | M))
+#define DO_ORNOT(N, M)  (N | !M)
+#define DO_XNOR(N, M)  (!(N ^ M))
+
+GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
+GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
+GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
+GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
+GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
+GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
+GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
+GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
+
+/* Vector mask population count vmpopc */
+target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env,
+                              uint32_t desc)
+{
+    target_ulong cnt = 0;
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+    int i;
+
+    for (i = 0; i < vl; i++) {
+        if (vm || vext_elem_mask(v0, mlen, i)) {
+            if (vext_elem_mask(vs2, mlen, i)) {
+                cnt++;
+            }
+        }
+    }
+    return cnt;
+}
+
+/* vmfirst find-first-set mask bit*/
+target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env,
+                               uint32_t desc)
+{
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+    int i;
+
+    for (i = 0; i < vl; i++) {
+        if (vm || vext_elem_mask(v0, mlen, i)) {
+            if (vext_elem_mask(vs2, mlen, i)) {
+                return i;
+            }
+        }
+    }
+    return -1LL;
+}
+
+enum set_mask_type {
+    ONLY_FIRST = 1,
+    INCLUDE_FIRST,
+    BEFORE_FIRST,
+};
+
+static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
+                   uint32_t desc, enum set_mask_type type)
+{
+    uint32_t mlen = vext_mlen(desc);
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;
+    uint32_t vm = vext_vm(desc);
+    uint32_t vl = env->vl;
+    int i;
+    bool first_mask_bit = false;
+
+    for (i = 0; i < vl; i++) {
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {
+            continue;
+        }
+        /* write a zero to all following active elements */
+        if (first_mask_bit) {
+            vext_set_elem_mask(vd, mlen, i, 0);
+            continue;
+        }
+        if (vext_elem_mask(vs2, mlen, i)) {
+            first_mask_bit = true;
+            if (type == BEFORE_FIRST) {
+                vext_set_elem_mask(vd, mlen, i, 0);
+            } else {
+                vext_set_elem_mask(vd, mlen, i, 1);
+            }
+        } else {
+            if (type == ONLY_FIRST) {
+                vext_set_elem_mask(vd, mlen, i, 0);
+            } else {
+                vext_set_elem_mask(vd, mlen, i, 1);
+            }
+        }
+    }
+    for (; i < vlmax; i++) {
+        vext_set_elem_mask(vd, mlen, i, 0);
+    }
+}
+
+void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
+                     uint32_t desc)
+{
+    vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
+}
+
+void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
+                     uint32_t desc)
+{
+    vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
+}
+
+void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
+                     uint32_t desc)
+{
+    vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
+}
+
+/* Vector Iota Instruction */
+#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H, CLEAR_FN)                        \
+void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
+                  uint32_t desc)                                          \
+{                                                                         \
+    uint32_t mlen = vext_mlen(desc);                                      \
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
+    uint32_t vm = vext_vm(desc);                                          \
+    uint32_t vl = env->vl;                                                \
+    uint32_t sum = 0;                                                     \
+    int i;                                                                \
+                                                                          \
+    for (i = 0; i < vl; i++) {                                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
+            continue;                                                     \
+        }                                                                 \
+        *((ETYPE *)vd + H(i)) = sum;                                      \
+        if (vext_elem_mask(vs2, mlen, i)) {                               \
+            sum++;                                                        \
+        }                                                                 \
+    }                                                                     \
+    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
+}
+
+GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb)
+GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh)
+GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl)
+GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq)
+
+/* Vector Element Index Instruction */
+#define GEN_VEXT_VID_V(NAME, ETYPE, H, CLEAR_FN)                          \
+void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
+{                                                                         \
+    uint32_t mlen = vext_mlen(desc);                                      \
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
+    uint32_t vm = vext_vm(desc);                                          \
+    uint32_t vl = env->vl;                                                \
+    int i;                                                                \
+                                                                          \
+    for (i = 0; i < vl; i++) {                                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
+            continue;                                                     \
+        }                                                                 \
+        *((ETYPE *)vd + H(i)) = i;                                        \
+    }                                                                     \
+    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
+}
+
+GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb)
+GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh)
+GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl)
+GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq)
+
+/*
+ *** Vector Permutation Instructions
+ */
+
+/* Vector Slide Instructions */
+#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H, CLEAR_FN)                    \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
+                  CPURISCVState *env, uint32_t desc)                      \
+{                                                                         \
+    uint32_t mlen = vext_mlen(desc);                                      \
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
+    uint32_t vm = vext_vm(desc);                                          \
+    uint32_t vl = env->vl;                                                \
+    target_ulong offset = s1, i;                                          \
+                                                                          \
+    for (i = offset; i < vl; i++) {                                       \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
+            continue;                                                     \
+        }                                                                 \
+        *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
+    }                                                                     \
+    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
+}
+
+/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1, clearb)
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2, clearh)
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4, clearl)
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8, clearq)
+
+#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN)                  \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
+                  CPURISCVState *env, uint32_t desc)                      \
+{                                                                         \
+    uint32_t mlen = vext_mlen(desc);                                      \
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
+    uint32_t vm = vext_vm(desc);                                          \
+    uint32_t vl = env->vl;                                                \
+    target_ulong offset = s1, i;                                          \
+                                                                          \
+    for (i = 0; i < vl; ++i) {                                            \
+        target_ulong j = i + offset;                                      \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
+            continue;                                                     \
+        }                                                                 \
+        *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j));  \
+    }                                                                     \
+    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
+}
+
+/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1, clearb)
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2, clearh)
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4, clearl)
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8, clearq)
+
+#define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H, CLEAR_FN)                   \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
+                  CPURISCVState *env, uint32_t desc)                      \
+{                                                                         \
+    uint32_t mlen = vext_mlen(desc);                                      \
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
+    uint32_t vm = vext_vm(desc);                                          \
+    uint32_t vl = env->vl;                                                \
+    uint32_t i;                                                           \
+                                                                          \
+    for (i = 0; i < vl; i++) {                                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
+            continue;                                                     \
+        }                                                                 \
+        if (i == 0) {                                                     \
+            *((ETYPE *)vd + H(i)) = s1;                                   \
+        } else {                                                          \
+            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));           \
+        }                                                                 \
+    }                                                                     \
+    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
+}
+
+/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1, clearb)
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2, clearh)
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4, clearl)
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8, clearq)
+
+#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H, CLEAR_FN)                 \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
+                  CPURISCVState *env, uint32_t desc)                      \
+{                                                                         \
+    uint32_t mlen = vext_mlen(desc);                                      \
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
+    uint32_t vm = vext_vm(desc);                                          \
+    uint32_t vl = env->vl;                                                \
+    uint32_t i;                                                           \
+                                                                          \
+    for (i = 0; i < vl; i++) {                                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
+            continue;                                                     \
+        }                                                                 \
+        if (i == vl - 1) {                                                \
+            *((ETYPE *)vd + H(i)) = s1;                                   \
+        } else {                                                          \
+            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));           \
+        }                                                                 \
+    }                                                                     \
+    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
+}
+
+/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb)
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh)
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl)
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq)
+
+/* Vector Register Gather Instruction */
+#define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H, CLEAR_FN)                    \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
+                  CPURISCVState *env, uint32_t desc)                      \
+{                                                                         \
+    uint32_t mlen = vext_mlen(desc);                                      \
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
+    uint32_t vm = vext_vm(desc);                                          \
+    uint32_t vl = env->vl;                                                \
+    uint32_t index, i;                                                    \
+                                                                          \
+    for (i = 0; i < vl; i++) {                                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
+            continue;                                                     \
+        }                                                                 \
+        index = *((ETYPE *)vs1 + H(i));                                   \
+        if (index >= vlmax) {                                             \
+            *((ETYPE *)vd + H(i)) = 0;                                    \
+        } else {                                                          \
+            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
+        }                                                                 \
+    }                                                                     \
+    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
+}
+
+/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
+GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1, clearb)
+GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2, clearh)
+GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4, clearl)
+GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq)
+
+#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H, CLEAR_FN)                    \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
+                  CPURISCVState *env, uint32_t desc)                      \
+{                                                                         \
+    uint32_t mlen = vext_mlen(desc);                                      \
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
+    uint32_t vm = vext_vm(desc);                                          \
+    uint32_t vl = env->vl;                                                \
+    uint32_t index = s1, i;                                               \
+                                                                          \
+    for (i = 0; i < vl; i++) {                                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
+            continue;                                                     \
+        }                                                                 \
+        if (index >= vlmax) {                                             \
+            *((ETYPE *)vd + H(i)) = 0;                                    \
+        } else {                                                          \
+            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
+        }                                                                 \
+    }                                                                     \
+    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
+}
+
+/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
+GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb)
+GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh)
+GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl)
+GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq)
+
+/* Vector Compress Instruction */
+#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H, CLEAR_FN)                   \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
+                  CPURISCVState *env, uint32_t desc)                      \
+{                                                                         \
+    uint32_t mlen = vext_mlen(desc);                                      \
+    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
+    uint32_t vl = env->vl;                                                \
+    uint32_t num = 0, i;                                                  \
+                                                                          \
+    for (i = 0; i < vl; i++) {                                            \
+        if (!vext_elem_mask(vs1, mlen, i)) {                              \
+            continue;                                                     \
+        }                                                                 \
+        *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
+        num++;                                                            \
+    }                                                                     \
+    CLEAR_FN(vd, num, num * sizeof(ETYPE), vlmax * sizeof(ETYPE));        \
+}
+
+/* Compress into vd elements of vs2 where vs1 is enabled */
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb)
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh)
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl)
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq)
diff --git a/qemu/target/s390x/cpu_features_def.inc.h b/qemu/target/s390x/cpu_features_def.inc.h
index 31dff0d84e..5942f81f16 100644
--- a/qemu/target/s390x/cpu_features_def.inc.h
+++ b/qemu/target/s390x/cpu_features_def.inc.h
@@ -107,6 +107,7 @@ DEF_FEAT(DEFLATE_BASE, "deflate-base", STFL, 151, "Deflate-conversion facility (
 DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, "Vector-Packed-Decimal-Enhancement Facility")
 DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, "Message-security-assist-extension-9 facility (excluding subfunctions)")
 DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility")
+DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility")
 
 /* Features exposed via SCLP SCCB Byte 80 - 98  (bit numbers relative to byte-80) */
 DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility")
@@ -310,7 +311,7 @@ DEF_FEAT(PCC_CMAC_ETDEA_192, "pcc-cmac-etdea-128", PCC, 10, "PCC Compute-Last-Bl
 DEF_FEAT(PCC_CMAC_TDEA, "pcc-cmac-etdea-192", PCC, 11, "PCC Compute-Last-Block-CMAC-Using-EncryptedTDEA-192")
 DEF_FEAT(PCC_CMAC_AES_128, "pcc-cmac-aes-128", PCC, 18, "PCC Compute-Last-Block-CMAC-Using-AES-128")
 DEF_FEAT(PCC_CMAC_AES_192, "pcc-cmac-aes-192", PCC, 19, "PCC Compute-Last-Block-CMAC-Using-AES-192")
-DEF_FEAT(PCC_CMAC_AES_256, "pcc-cmac-eaes-256", PCC, 20, "PCC Compute-Last-Block-CMAC-Using-AES-256")
+DEF_FEAT(PCC_CMAC_AES_256, "pcc-cmac-aes-256", PCC, 20, "PCC Compute-Last-Block-CMAC-Using-AES-256")
 DEF_FEAT(PCC_CMAC_EAES_128, "pcc-cmac-eaes-128", PCC, 26, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-128")
 DEF_FEAT(PCC_CMAC_EAES_192, "pcc-cmac-eaes-192", PCC, 27, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-192")
 DEF_FEAT(PCC_CMAC_EAES_256, "pcc-cmac-eaes-256", PCC, 28, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-256")
diff --git a/qemu/target/s390x/fpu_helper.c b/qemu/target/s390x/fpu_helper.c
index 0fc39d7138..241260f605 100644
--- a/qemu/target/s390x/fpu_helper.c
+++ b/qemu/target/s390x/fpu_helper.c
@@ -112,7 +112,7 @@ static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr)
     }
 }
 
-int float_comp_to_cc(CPUS390XState *env, int float_compare)
+int float_comp_to_cc(CPUS390XState *env, FloatRelation float_compare)
 {
     switch (float_compare) {
     case float_relation_equal:
@@ -368,7 +368,7 @@ uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al,
 /* 32-bit FP compare */
 uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
-    int cmp = float32_compare_quiet(f1, f2, &env->fpu_status);
+    FloatRelation cmp = float32_compare_quiet(f1, f2, &env->fpu_status);
     handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
@@ -376,7 +376,7 @@ uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 /* 64-bit FP compare */
 uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
-    int cmp = float64_compare_quiet(f1, f2, &env->fpu_status);
+    FloatRelation cmp = float64_compare_quiet(f1, f2, &env->fpu_status);
     handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
@@ -385,9 +385,9 @@ uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
                      uint64_t bh, uint64_t bl)
 {
-    int cmp = float128_compare_quiet(make_float128(ah, al),
-                                     make_float128(bh, bl),
-                                     &env->fpu_status);
+    FloatRelation cmp = float128_compare_quiet(make_float128(ah, al),
+                                               make_float128(bh, bl),
+                                               &env->fpu_status);
     handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
@@ -675,7 +675,7 @@ uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al,
 /* 32-bit FP compare and signal */
 uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
-    int cmp = float32_compare(f1, f2, &env->fpu_status);
+    FloatRelation cmp = float32_compare(f1, f2, &env->fpu_status);
     handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
@@ -683,7 +683,7 @@ uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 /* 64-bit FP compare and signal */
 uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
-    int cmp = float64_compare(f1, f2, &env->fpu_status);
+    FloatRelation cmp = float64_compare(f1, f2, &env->fpu_status);
     handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
@@ -692,9 +692,9 @@ uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
                      uint64_t bh, uint64_t bl)
 {
-    int cmp = float128_compare(make_float128(ah, al),
-                               make_float128(bh, bl),
-                               &env->fpu_status);
+    FloatRelation cmp = float128_compare(make_float128(ah, al),
+                                         make_float128(bh, bl),
+                                         &env->fpu_status);
     handle_exceptions(env, false, GETPC());
     return float_comp_to_cc(env, cmp);
 }
diff --git a/qemu/target/s390x/gen-features.c b/qemu/target/s390x/gen-features.c
index 6278845b12..8ddeebc544 100644
--- a/qemu/target/s390x/gen-features.c
+++ b/qemu/target/s390x/gen-features.c
@@ -562,6 +562,7 @@ static uint16_t full_GEN15_GA1[] = {
     S390_FEAT_GROUP_MSA_EXT_9,
     S390_FEAT_GROUP_MSA_EXT_9_PCKMO,
     S390_FEAT_ETOKEN,
+    S390_FEAT_UNPACK,
 };
 
 /* Default features (in order of release)
diff --git a/qemu/target/s390x/helper.h b/qemu/target/s390x/helper.h
index abd8dd2a97..ddcce6de88 100644
--- a/qemu/target/s390x/helper.h
+++ b/qemu/target/s390x/helper.h
@@ -202,10 +202,6 @@ DEF_HELPER_FLAGS_4(gvec_vmlo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vmlo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_3(gvec_vpopct8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
 DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
-DEF_HELPER_FLAGS_4(gvec_verllv8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
-DEF_HELPER_FLAGS_4(gvec_verllv16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
-DEF_HELPER_FLAGS_4(gvec_verll8, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
-DEF_HELPER_FLAGS_4(gvec_verll16, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
 DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
diff --git a/qemu/target/s390x/insn-data.def b/qemu/target/s390x/insn-data.def
index 1660c4d1f8..5ff795fa13 100644
--- a/qemu/target/s390x/insn-data.def
+++ b/qemu/target/s390x/insn-data.def
@@ -798,7 +798,7 @@
 /* SQUARE ROOT */
     F(0xb314, SQEBR,   RRE,   Z,   0, e2, new, e1, sqeb, 0, IF_BFP)
     F(0xb315, SQDBR,   RRE,   Z,   0, f2, new, f1, sqdb, 0, IF_BFP)
-    F(0xb316, SQXBR,   RRE,   Z,   x2h, x2l, new, x1, sqxb, 0, IF_BFP)
+    F(0xb316, SQXBR,   RRE,   Z,   x2h, x2l, new_P, x1, sqxb, 0, IF_BFP)
     F(0xed14, SQEB,    RXE,   Z,   0, m2_32u, new, e1, sqeb, 0, IF_BFP)
     F(0xed15, SQDB,    RXE,   Z,   0, m2_64, new, f1, sqdb, 0, IF_BFP)
 
@@ -1147,8 +1147,8 @@
 /* VECTOR POPULATION COUNT */
     F(0xe750, VPOPCT,  VRR_a, V,   0, 0, 0, 0, vpopct, 0, IF_VEC)
 /* VECTOR ELEMENT ROTATE LEFT LOGICAL */
-    F(0xe773, VERLLV,  VRR_c, V,   0, 0, 0, 0, verllv, 0, IF_VEC)
-    F(0xe733, VERLL,   VRS_a, V,   la2, 0, 0, 0, verll, 0, IF_VEC)
+    F(0xe773, VERLLV,  VRR_c, V,   0, 0, 0, 0, vesv, 0, IF_VEC)
+    F(0xe733, VERLL,   VRS_a, V,   la2, 0, 0, 0, ves, 0, IF_VEC)
 /* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */
     F(0xe772, VERIM,   VRI_d, V,   0, 0, 0, 0, verim, 0, IF_VEC)
 /* VECTOR ELEMENT SHIFT LEFT */
diff --git a/qemu/target/s390x/internal.h b/qemu/target/s390x/internal.h
index 82cf8726be..cec0957fb4 100644
--- a/qemu/target/s390x/internal.h
+++ b/qemu/target/s390x/internal.h
@@ -11,6 +11,7 @@
 #define S390X_INTERNAL_H
 
 #include "cpu.h"
+#include "fpu/softfloat.h"
 
 #ifndef CONFIG_USER_ONLY
 QEMU_PACK(typedef struct LowCore {
@@ -268,7 +269,7 @@ uint32_t set_cc_nz_f128(float128 v);
 uint8_t s390_softfloat_exc_to_ieee(unsigned int exc);
 int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3);
 void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode);
-int float_comp_to_cc(CPUS390XState *env, int float_compare);
+int float_comp_to_cc(CPUS390XState *env, FloatRelation float_compare);
 uint16_t float32_dcmask(CPUS390XState *env, float32 f1);
 uint16_t float64_dcmask(CPUS390XState *env, float64 f1);
 uint16_t float128_dcmask(CPUS390XState *env, float128 f1);
diff --git a/qemu/target/s390x/translate.c b/qemu/target/s390x/translate.c
index e41a3b73b0..dec5f4139c 100644
--- a/qemu/target/s390x/translate.c
+++ b/qemu/target/s390x/translate.c
@@ -3936,8 +3936,7 @@ static DisasJumpType op_risbg(DisasContext *s, DisasOps *o)
         pmask = 0x00000000ffffffffull;
         break;
     default:
-        // g_assert_not_reached();
-        break;
+        g_assert_not_reached();
     }
 
     /* MASK is the set of bits to be inserted from R2.
diff --git a/qemu/target/s390x/translate_vx.inc.c b/qemu/target/s390x/translate_vx.inc.c
index 568b6a2acb..bdf0aecf34 100644
--- a/qemu/target/s390x/translate_vx.inc.c
+++ b/qemu/target/s390x/translate_vx.inc.c
@@ -233,8 +233,8 @@ static void get_vec_element_ptr_i64(TCGContext *tcg_ctx, TCGv_ptr ptr, uint8_t r
 #define gen_gvec_mov(tcg_ctx, v1, v2) \
     tcg_gen_gvec_mov(tcg_ctx, 0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
                      16)
-#define gen_gvec_dup64i(tcg_ctx, v1, c) \
-    tcg_gen_gvec_dup64i(tcg_ctx, vec_full_reg_offset(v1), 16, 16, c)
+#define gen_gvec_dup_imm(tcg_ctx, es, v1, c) \
+    tcg_gen_gvec_dup_imm(tcg_ctx, es, vec_full_reg_offset(v1), 16, 16, c);
 #define gen_gvec_fn_2(tcg_ctx, fn, es, v1, v2) \
     tcg_gen_gvec_##fn(tcg_ctx, es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                       16, 16)
@@ -318,31 +318,6 @@ static void gen_gvec128_4_i64(TCGContext *tcg_ctx, gen_gvec128_4_i64_fn fn, uint
         tcg_temp_free_i64(tcg_ctx, cl);
 }
 
-static void gen_gvec_dupi(TCGContext *tcg_ctx, uint8_t es, uint8_t reg, uint64_t c)
-{
-    switch (es) {
-    case ES_8:
-        tcg_gen_gvec_dup8i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, c);
-        break;
-    case ES_16:
-        tcg_gen_gvec_dup16i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, c);
-        break;
-    case ES_32:
-        tcg_gen_gvec_dup32i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, c);
-        break;
-    case ES_64:
-        gen_gvec_dup64i(tcg_ctx, reg, c);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-}
-
-static void zero_vec(TCGContext *tcg_ctx, uint8_t reg)
-{
-    tcg_gen_gvec_dup8i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, 0);
-}
-
 static void gen_addi2_i64(TCGContext *tcg_ctx, TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
                           uint64_t b)
 {
@@ -400,8 +375,8 @@ static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
          * Masks for both 64 bit elements of the vector are the same.
          * Trust tcg to produce a good constant loading.
          */
-        gen_gvec_dup64i(tcg_ctx, get_field(s, v1),
-                        generate_byte_mask(i2 & 0xff));
+        gen_gvec_dup_imm(tcg_ctx, ES_64, get_field(s, v1),
+                         generate_byte_mask(i2 & 0xff));
     } else {
         TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
 
@@ -437,7 +412,7 @@ static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
         }
     }
 
-    gen_gvec_dupi(tcg_ctx, es, get_field(s, v1), mask);
+    gen_gvec_dup_imm(tcg_ctx, es, get_field(s, v1), mask);
     return DISAS_NEXT;
 }
 
@@ -598,7 +573,7 @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
 
     t = tcg_temp_new_i64(tcg_ctx);
     tcg_gen_qemu_ld_i64(tcg_ctx, t, o->addr1, get_mem_index(s), MO_TE | es);
-    zero_vec(tcg_ctx, get_field(s, v1));
+    gen_gvec_dup_imm(tcg_ctx, es, get_field(s, v1), 0);
     write_vec_element_i64(tcg_ctx, t, get_field(s, v1), enr, es);
     tcg_temp_free_i64(tcg_ctx, t);
     return DISAS_NEXT;
@@ -917,7 +892,7 @@ static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
         return DISAS_NORETURN;
     }
 
-    gen_gvec_dupi(tcg_ctx, es, get_field(s, v1), data);
+    gen_gvec_dup_imm(tcg_ctx, es, get_field(s, v1), data);
     return DISAS_NEXT;
 }
 
@@ -1414,7 +1389,7 @@ static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o)
         read_vec_element_i32(tcg_ctx, tmp, get_field(s, v2), i, ES_32);
         tcg_gen_add2_i32(tcg_ctx, tmp, sum, sum, sum, tmp, tmp);
     }
-    zero_vec(tcg_ctx, get_field(s, v1));
+    gen_gvec_dup_imm(tcg_ctx, ES_32, get_field(s, v1), 0);
     write_vec_element_i32(tcg_ctx, sum, get_field(s, v1), 1, ES_32);
 
     tcg_temp_free_i32(tcg_ctx, tmp);
@@ -1910,65 +1885,6 @@ static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
-static void gen_rll_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx);
-
-    tcg_gen_andi_i32(tcg_ctx, t0, b, 31);
-    tcg_gen_rotl_i32(tcg_ctx, d, a, t0);
-    tcg_temp_free_i32(tcg_ctx, t0);
-}
-
-static void gen_rll_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
-    TCGv_i64 t0 = tcg_temp_new_i64(tcg_ctx);
-
-    tcg_gen_andi_i64(tcg_ctx, t0, b, 63);
-    tcg_gen_rotl_i64(tcg_ctx, d, a, t0);
-    tcg_temp_free_i64(tcg_ctx, t0);
-}
-
-static DisasJumpType op_verllv(DisasContext *s, DisasOps *o)
-{
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    const uint8_t es = get_field(s, m4);
-    static const GVecGen3 g[4] = {
-        { .fno = gen_helper_gvec_verllv8, },
-        { .fno = gen_helper_gvec_verllv16, },
-        { .fni4 = gen_rll_i32, },
-        { .fni8 = gen_rll_i64, },
-    };
-
-    if (es > ES_64) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-
-    gen_gvec_3(tcg_ctx, get_field(s, v1), get_field(s, v2),
-               get_field(s, v3), &g[es]);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_verll(DisasContext *s, DisasOps *o)
-{
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    const uint8_t es = get_field(s, m4);
-    static const GVecGen2s g[4] = {
-        { .fno = gen_helper_gvec_verll8, },
-        { .fno = gen_helper_gvec_verll16, },
-        { .fni4 = gen_rll_i32, },
-        { .fni8 = gen_rll_i64, },
-    };
-
-    if (es > ES_64) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-    gen_gvec_2s(tcg_ctx, get_field(s, v1), get_field(s, v3), o->addr1,
-                &g[es]);
-    return DISAS_NEXT;
-}
-
 static void gen_rim_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
 {
     TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
@@ -2035,6 +1951,9 @@ static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
     case 0x70:
         gen_gvec_fn_3(tcg_ctx, shlv, es, v1, v2, v3);
         break;
+    case 0x73:
+        gen_gvec_fn_3(tcg_ctx, rotlv, es, v1, v2, v3);
+        break;
     case 0x7a:
         gen_gvec_fn_3(tcg_ctx, sarv, es, v1, v2, v3);
         break;
@@ -2067,6 +1986,9 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
         case 0x30:
             gen_gvec_fn_2i(tcg_ctx, shli, es, v1, v3, d2);
             break;
+        case 0x33:
+            gen_gvec_fn_2i(tcg_ctx, rotli, es, v1, v3, d2);
+            break;
         case 0x3a:
             gen_gvec_fn_2i(tcg_ctx, sari, es, v1, v3, d2);
             break;
@@ -2084,6 +2006,9 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
         case 0x30:
             gen_gvec_fn_2s(tcg_ctx, shls, es, v1, v3, shift);
             break;
+        case 0x33:
+            gen_gvec_fn_2s(tcg_ctx, rotls, es, v1, v3, shift);
+            break;
         case 0x3a:
             gen_gvec_fn_2s(tcg_ctx, sars, es, v1, v3, shift);
             break;
diff --git a/qemu/target/s390x/vec_fpu_helper.c b/qemu/target/s390x/vec_fpu_helper.c
index e87ef56f04..09cb61fbeb 100644
--- a/qemu/target/s390x/vec_fpu_helper.c
+++ b/qemu/target/s390x/vec_fpu_helper.c
@@ -174,7 +174,7 @@ void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env,
     env->cc_op = wfc64(v1, v2, env, true, GETPC());
 }
 
-typedef int (*vfc64_fn)(float64 a, float64 b, float_status *status);
+typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
                  CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
 {
diff --git a/qemu/target/s390x/vec_int_helper.c b/qemu/target/s390x/vec_int_helper.c
index b81441395c..a4e486a8b8 100644
--- a/qemu/target/s390x/vec_int_helper.c
+++ b/qemu/target/s390x/vec_int_helper.c
@@ -515,37 +515,6 @@ void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
 DEF_VPOPCT(8)
 DEF_VPOPCT(16)
 
-#define DEF_VERLLV(BITS)                                                       \
-void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3,       \
-                               uint32_t desc)                                  \
-{                                                                              \
-    int i;                                                                     \
-                                                                               \
-    for (i = 0; i < (128 / BITS); i++) {                                       \
-        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
-        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
-                                                                               \
-        s390_vec_write_element##BITS(v1, i, rol##BITS(a, b));                  \
-    }                                                                          \
-}
-DEF_VERLLV(8)
-DEF_VERLLV(16)
-
-#define DEF_VERLL(BITS)                                                        \
-void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count,        \
-                              uint32_t desc)                                   \
-{                                                                              \
-    int i;                                                                     \
-                                                                               \
-    for (i = 0; i < (128 / BITS); i++) {                                       \
-        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
-                                                                               \
-        s390_vec_write_element##BITS(v1, i, rol##BITS(a, count));              \
-    }                                                                          \
-}
-DEF_VERLL(8)
-DEF_VERLL(16)
-
 #define DEF_VERIM(BITS)                                                        \
 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
                               uint32_t desc)                                   \
diff --git a/qemu/target/sparc/fop_helper.c b/qemu/target/sparc/fop_helper.c
index 9eb9b75718..e6dd3fc313 100644
--- a/qemu/target/sparc/fop_helper.c
+++ b/qemu/target/sparc/fop_helper.c
@@ -264,7 +264,7 @@ void helper_fsqrtq(CPUSPARCState *env)
 #define GEN_FCMP(name, size, reg1, reg2, FS, E)                         \
     target_ulong glue(helper_, name) (CPUSPARCState *env)               \
     {                                                                   \
-        int ret;                                                        \
+        FloatRelation ret;                                              \
         target_ulong fsr;                                               \
         if (E) {                                                        \
             ret = glue(size, _compare)(reg1, reg2, &env->fp_status);    \
@@ -295,7 +295,7 @@ void helper_fsqrtq(CPUSPARCState *env)
 #define GEN_FCMP_T(name, size, FS, E)                                   \
     target_ulong glue(helper_, name)(CPUSPARCState *env, size src1, size src2)\
     {                                                                   \
-        int ret;                                                        \
+        FloatRelation ret;                                              \
         target_ulong fsr;                                               \
         if (E) {                                                        \
             ret = glue(size, _compare)(src1, src2, &env->fp_status);    \
diff --git a/qemu/target/tricore/translate.c b/qemu/target/tricore/translate.c
index 75188b8be6..d8d9584787 100644
--- a/qemu/target/tricore/translate.c
+++ b/qemu/target/tricore/translate.c
@@ -52,7 +52,6 @@ static const char *regnames_d[] = {
 
 typedef struct DisasContext {
     DisasContextBase base;
-    CPUTriCoreState *env;
     target_ulong pc;
     // CCOp cc_op; /* Current CC operation */
     target_ulong pc_succ_insn;
diff --git a/qemu/tcg/README b/qemu/tcg/README
index bfa2e4ed24..a64f67809b 100644
--- a/qemu/tcg/README
+++ b/qemu/tcg/README
@@ -605,10 +605,11 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
 
 * shri_vec   v0, v1, i2
 * sari_vec   v0, v1, i2
+* rotli_vec  v0, v1, i2
 * shrs_vec   v0, v1, s2
 * sars_vec   v0, v1, s2
 
-  Similarly for logical and arithmetic right shift.
+  Similarly for logical and arithmetic right shift, and left rotate.
 
 * shlv_vec   v0, v1, v2
 
@@ -620,8 +621,10 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
 
 * shrv_vec   v0, v1, v2
 * sarv_vec   v0, v1, v2
+* rotlv_vec  v0, v1, v2
+* rotrv_vec  v0, v1, v2
 
-  Similarly for logical and arithmetic right shift.
+  Similarly for logical and arithmetic right shift, and rotates.
 
 * cmp_vec  v0, v1, v2, cond
 
diff --git a/qemu/tcg/aarch64/tcg-target.h b/qemu/tcg/aarch64/tcg-target.h
index 13993a70e5..e7673bb032 100644
--- a/qemu/tcg/aarch64/tcg-target.h
+++ b/qemu/tcg/aarch64/tcg-target.h
@@ -137,6 +137,9 @@ typedef enum {
 #define TCG_TARGET_HAS_not_vec          1
 #define TCG_TARGET_HAS_neg_vec          1
 #define TCG_TARGET_HAS_abs_vec          1
+#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         0
 #define TCG_TARGET_HAS_shi_vec          1
 #define TCG_TARGET_HAS_shs_vec          0
 #define TCG_TARGET_HAS_shv_vec          1
diff --git a/qemu/tcg/aarch64/tcg-target.inc.c b/qemu/tcg/aarch64/tcg-target.inc.c
index 50c9e595bb..c1f5483651 100644
--- a/qemu/tcg/aarch64/tcg-target.inc.c
+++ b/qemu/tcg/aarch64/tcg-target.inc.c
@@ -557,6 +557,7 @@ typedef enum {
     I3614_SSHR      = 0x0f000400,
     I3614_SSRA      = 0x0f001400,
     I3614_SHL       = 0x0f005400,
+    I3614_SLI       = 0x2f005400,
     I3614_USHR      = 0x2f000400,
     I3614_USRA      = 0x2f001400,
 
@@ -1504,11 +1505,21 @@ static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
 {
     static const uint32_t sync[] = {
-        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
-        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
-        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
-        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
-        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
+        [0]                                          = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_LD_LD]                               = DMB_ISH | DMB_LD,
+        [TCG_MO_ST_LD]                               = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_LD_LD | TCG_MO_ST_LD]                = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_LD_ST]                               = DMB_ISH | DMB_LD,
+        [TCG_MO_LD_ST | TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
+        [TCG_MO_LD_ST | TCG_MO_ST_LD]                = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_LD_ST | TCG_MO_LD_LD | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_ST_ST]                               = DMB_ISH | DMB_ST,
+        [TCG_MO_ST_ST | TCG_MO_LD_LD]                = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_ST_ST | TCG_MO_ST_LD]                = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_ST_ST | TCG_MO_ST_LD | TCG_MO_LD_LD] = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_ST_ST | TCG_MO_LD_ST]                = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_ST_ST | TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_ST_ST | TCG_MO_LD_ST | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST,
     };
     tcg_out32(s, sync[a0 & TCG_MO_ALL]);
 }
@@ -1659,9 +1670,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
                              tcg_insn_unit **label_ptr, int mem_index,
                              bool is_read)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = s->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = s->uc;
     unsigned a_bits = get_alignment_bits(opc);
     unsigned s_bits = opc & MO_SIZE;
     unsigned a_mask = (1u << a_bits) - 1;
@@ -2422,6 +2431,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_sari_vec:
         tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
         break;
+    case INDEX_op_aa64_sli_vec:
+        tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
+        break;
     case INDEX_op_shlv_vec:
         tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
         break;
@@ -2509,8 +2521,11 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign
     case INDEX_op_shlv_vec:
     case INDEX_op_bitsel_vec:
         return 1;
+    case INDEX_op_rotli_vec:
     case INDEX_op_shrv_vec:
     case INDEX_op_sarv_vec:
+    case INDEX_op_rotlv_vec:
+    case INDEX_op_rotrv_vec:
         return -1;
     case INDEX_op_mul_vec:
     case INDEX_op_smax_vec:
@@ -2528,14 +2543,24 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne
                        TCGArg a0, ...)
 {
     va_list va;
-    TCGv_vec v0, v1, v2, t1;
+    TCGv_vec v0, v1, v2, t1, t2;
+    TCGArg a2;
 
     va_start(va, a0);
     v0 = temp_tcgv_vec(tcg_ctx, arg_temp(a0));
     v1 = temp_tcgv_vec(tcg_ctx, arg_temp(va_arg(va, TCGArg)));
-    v2 = temp_tcgv_vec(tcg_ctx, arg_temp(va_arg(va, TCGArg)));
+    a2 = va_arg(va, TCGArg);
+    v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2));
 
     switch (opc) {
+    case INDEX_op_rotli_vec:
+        t1 = tcg_temp_new_vec(tcg_ctx, type);
+        tcg_gen_shri_vec(tcg_ctx, vece, t1, v1, -a2 & ((8 << vece) - 1));
+        vec_gen_4(tcg_ctx, INDEX_op_aa64_sli_vec, type, vece,
+                  tcgv_vec_arg(tcg_ctx, v0), tcgv_vec_arg(tcg_ctx, t1), tcgv_vec_arg(tcg_ctx, v1), a2);
+        tcg_temp_free_vec(tcg_ctx, t1);
+        break;
+
     case INDEX_op_shrv_vec:
     case INDEX_op_sarv_vec:
         /* Right shifts are negative left shifts for AArch64.  */
@@ -2548,6 +2573,35 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne
         tcg_temp_free_vec(tcg_ctx, t1);
         break;
 
+    case INDEX_op_rotlv_vec:
+        t1 = tcg_temp_new_vec(tcg_ctx, type);
+        tcg_gen_dupi_vec(tcg_ctx, vece, t1, 8 << vece);
+        tcg_gen_sub_vec(tcg_ctx, vece, t1, v2, t1);
+        /* Right shifts are negative left shifts for AArch64.  */
+        vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, t1),
+                  tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, t1));
+        vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, v0),
+                  tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v2));
+        tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t1);
+        tcg_temp_free_vec(tcg_ctx, t1);
+        break;
+
+    case INDEX_op_rotrv_vec:
+        t1 = tcg_temp_new_vec(tcg_ctx, type);
+        t2 = tcg_temp_new_vec(tcg_ctx, type);
+        tcg_gen_neg_vec(tcg_ctx, vece, t1, v2);
+        tcg_gen_dupi_vec(tcg_ctx, vece, t2, 8 << vece);
+        tcg_gen_add_vec(tcg_ctx, vece, t2, t1, t2);
+        /* Right shifts are negative left shifts for AArch64.  */
+        vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, t1),
+                  tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, t1));
+        vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, t2),
+                  tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, t2));
+        tcg_gen_or_vec(tcg_ctx, vece, v0, t1, t2);
+        tcg_temp_free_vec(tcg_ctx, t1);
+        tcg_temp_free_vec(tcg_ctx, t2);
+        break;
+
     default:
         g_assert_not_reached();
     }
@@ -2568,6 +2622,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
     static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
     static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
     static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
+    static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
     static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
     static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
     static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
@@ -2762,6 +2817,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
         return &w_w_wZ;
     case INDEX_op_bitsel_vec:
         return &w_w_w_w;
+    case INDEX_op_aa64_sli_vec:
+        return &w_0_w;
 
     default:
         return NULL;
diff --git a/qemu/tcg/aarch64/tcg-target.opc.h b/qemu/tcg/aarch64/tcg-target.opc.h
index 26bfd9c460..bce30accd9 100644
--- a/qemu/tcg/aarch64/tcg-target.opc.h
+++ b/qemu/tcg/aarch64/tcg-target.opc.h
@@ -12,3 +12,4 @@
  */
 
 DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
+DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC)
diff --git a/qemu/tcg/arm/tcg-target.inc.c b/qemu/tcg/arm/tcg-target.inc.c
index 467d063690..8884968fb3 100644
--- a/qemu/tcg/arm/tcg-target.inc.c
+++ b/qemu/tcg/arm/tcg-target.inc.c
@@ -1235,9 +1235,7 @@ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
                                MemOp opc, int mem_index, bool is_load)
 {
-#ifdef TARGET_ARM
     struct uc_struct *uc = s->uc;
-#endif
     int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
                    : offsetof(CPUTLBEntry, addr_write));
     int fast_off = TLB_MASK_TABLE_OFS(mem_index);
diff --git a/qemu/tcg/i386/tcg-target.h b/qemu/tcg/i386/tcg-target.h
index 24ba5d19be..8508e68e7c 100644
--- a/qemu/tcg/i386/tcg-target.h
+++ b/qemu/tcg/i386/tcg-target.h
@@ -183,6 +183,9 @@ extern bool have_avx2;
 #define TCG_TARGET_HAS_not_vec          0
 #define TCG_TARGET_HAS_neg_vec          0
 #define TCG_TARGET_HAS_abs_vec          1
+#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         0
 #define TCG_TARGET_HAS_shi_vec          1
 #define TCG_TARGET_HAS_shs_vec          1
 #define TCG_TARGET_HAS_shv_vec          have_avx2
diff --git a/qemu/tcg/i386/tcg-target.inc.c b/qemu/tcg/i386/tcg-target.inc.c
index 15cc1c05d9..9cb46fe1be 100644
--- a/qemu/tcg/i386/tcg-target.inc.c
+++ b/qemu/tcg/i386/tcg-target.inc.c
@@ -1704,9 +1704,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
                                     int mem_index, MemOp opc,
                                     tcg_insn_unit **label_ptr, int which)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = s->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = s->uc;
     const TCGReg r0 = TCG_REG_L0;
     const TCGReg r1 = TCG_REG_L1;
     TCGType ttype = TCG_TYPE_I32;
@@ -3195,6 +3193,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
     case INDEX_op_shls_vec:
     case INDEX_op_shrs_vec:
     case INDEX_op_sars_vec:
+    case INDEX_op_rotls_vec:
     case INDEX_op_cmp_vec:
     case INDEX_op_x86_shufps_vec:
     case INDEX_op_x86_blend_vec:
@@ -3233,6 +3232,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign
     case INDEX_op_xor_vec:
     case INDEX_op_andc_vec:
         return 1;
+    case INDEX_op_rotli_vec:
     case INDEX_op_cmp_vec:
     case INDEX_op_cmpsel_vec:
         return -1;
@@ -3259,12 +3259,17 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign
         return vece >= MO_16;
     case INDEX_op_sars_vec:
         return vece >= MO_16 && vece <= MO_32;
+    case INDEX_op_rotls_vec:
+        return vece >= MO_16 ? -1 : 0;
 
     case INDEX_op_shlv_vec:
     case INDEX_op_shrv_vec:
         return have_avx2 && vece >= MO_32;
     case INDEX_op_sarv_vec:
         return have_avx2 && vece == MO_32;
+    case INDEX_op_rotlv_vec:
+    case INDEX_op_rotrv_vec:
+        return have_avx2 && vece >= MO_32 ? -1 : 0;
 
     case INDEX_op_mul_vec:
         if (vece == MO_8) {
@@ -3293,7 +3298,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign
     }
 }
 
-static void expand_vec_shi(TCGContext *tcg_ctx, TCGType type, unsigned vece, bool shr,
+static void expand_vec_shi(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCGOpcode opc,
                            TCGv_vec v0, TCGv_vec v1, TCGArg imm)
 {
     TCGv_vec t1, t2;
@@ -3303,26 +3308,31 @@ static void expand_vec_shi(TCGContext *tcg_ctx, TCGType type, unsigned vece, boo
     t1 = tcg_temp_new_vec(tcg_ctx, type);
     t2 = tcg_temp_new_vec(tcg_ctx, type);
 
-    /* Unpack to W, shift, and repack.  Tricky bits:
-       (1) Use punpck*bw x,x to produce DDCCBBAA,
-           i.e. duplicate in other half of the 16-bit lane.
-       (2) For right-shift, add 8 so that the high half of
-           the lane becomes zero.  For left-shift, we must
-           shift up and down again.
-       (3) Step 2 leaves high half zero such that PACKUSWB
-           (pack with unsigned saturation) does not modify
-           the quantity.  */
+    /*
+     * Unpack to W, shift, and repack.  Tricky bits:
+     * (1) Use punpck*bw x,x to produce DDCCBBAA,
+     *     i.e. duplicate in other half of the 16-bit lane.
+     * (2) For right-shift, add 8 so that the high half of the lane
+     *     becomes zero.  For left-shift, and left-rotate, we must
+     *     shift up and down again.
+     * (3) Step 2 leaves high half zero such that PACKUSWB
+     *     (pack with unsigned saturation) does not modify
+     *     the quantity.
+     */
     vec_gen_3(tcg_ctx, INDEX_op_x86_punpckl_vec, type, MO_8,
               tcgv_vec_arg(tcg_ctx, t1), tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v1));
     vec_gen_3(tcg_ctx, INDEX_op_x86_punpckh_vec, type, MO_8,
               tcgv_vec_arg(tcg_ctx, t2), tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v1));
 
-    if (shr) {
-        tcg_gen_shri_vec(tcg_ctx, MO_16, t1, t1, imm + 8);
-        tcg_gen_shri_vec(tcg_ctx, MO_16, t2, t2, imm + 8);
+    if (opc != INDEX_op_rotli_vec) {
+        imm += 8;
+    }
+    if (opc == INDEX_op_shri_vec) {
+        tcg_gen_shri_vec(tcg_ctx, MO_16, t1, t1, imm);
+        tcg_gen_shri_vec(tcg_ctx, MO_16, t2, t2, imm);
     } else {
-        tcg_gen_shli_vec(tcg_ctx, MO_16, t1, t1, imm + 8);
-        tcg_gen_shli_vec(tcg_ctx, MO_16, t2, t2, imm + 8);
+        tcg_gen_shli_vec(tcg_ctx, MO_16, t1, t1, imm);
+        tcg_gen_shli_vec(tcg_ctx, MO_16, t2, t2, imm);
         tcg_gen_shri_vec(tcg_ctx, MO_16, t1, t1, 8);
         tcg_gen_shri_vec(tcg_ctx, MO_16, t2, t2, 8);
     }
@@ -3389,6 +3399,61 @@ static void expand_vec_sari(TCGContext *tcg_ctx, TCGType type, unsigned vece,
     }
 }
 
+static void expand_vec_rotli(TCGContext *tcg_ctx, TCGType type, unsigned vece,
+                             TCGv_vec v0, TCGv_vec v1, TCGArg imm)
+{
+    TCGv_vec t;
+
+    if (vece == MO_8) {
+        expand_vec_shi(tcg_ctx, type, vece, INDEX_op_rotli_vec, v0, v1, imm);
+        return;
+    }
+
+    t = tcg_temp_new_vec(tcg_ctx, type);
+    tcg_gen_shli_vec(tcg_ctx, vece, t, v1, imm);
+    tcg_gen_shri_vec(tcg_ctx, vece, v0, v1, (8 << vece) - imm);
+    tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t);
+    tcg_temp_free_vec(tcg_ctx, t);
+}
+
+static void expand_vec_rotls(TCGContext *tcg_ctx, TCGType type, unsigned vece,
+                             TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
+{
+    TCGv_i32 rsh;
+    TCGv_vec t;
+
+    tcg_debug_assert(vece != MO_8);
+
+    t = tcg_temp_new_vec(tcg_ctx, type);
+    rsh = tcg_temp_new_i32(tcg_ctx);
+
+    tcg_gen_neg_i32(tcg_ctx, rsh, lsh);
+    tcg_gen_andi_i32(tcg_ctx, rsh, rsh, (8 << vece) - 1);
+    tcg_gen_shls_vec(tcg_ctx, vece, t, v1, lsh);
+    tcg_gen_shrs_vec(tcg_ctx, vece, v0, v1, rsh);
+    tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t);
+    tcg_temp_free_vec(tcg_ctx, t);
+    tcg_temp_free_i32(tcg_ctx, rsh);
+}
+
+static void expand_vec_rotv(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCGv_vec v0,
+                            TCGv_vec v1, TCGv_vec sh, bool right)
+{
+    TCGv_vec t = tcg_temp_new_vec(tcg_ctx, type);
+
+    tcg_gen_dupi_vec(tcg_ctx, vece, t, 8 << vece);
+    tcg_gen_sub_vec(tcg_ctx, vece, t, t, sh);
+    if (right) {
+        tcg_gen_shlv_vec(tcg_ctx, vece, t, v1, t);
+        tcg_gen_shrv_vec(tcg_ctx, vece, v0, v1, sh);
+    } else {
+        tcg_gen_shrv_vec(tcg_ctx, vece, t, v1, t);
+        tcg_gen_shlv_vec(tcg_ctx, vece, v0, v1, sh);
+    }
+    tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t);
+    tcg_temp_free_vec(tcg_ctx, t);
+}
+
 static void expand_vec_mul(TCGContext *tcg_ctx, TCGType type, unsigned vece,
                            TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
 {
@@ -3598,13 +3663,30 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne
     switch (opc) {
     case INDEX_op_shli_vec:
     case INDEX_op_shri_vec:
-        expand_vec_shi(tcg_ctx, type, vece, opc == INDEX_op_shri_vec, v0, v1, a2);
+        expand_vec_shi(tcg_ctx, type, vece, opc, v0, v1, a2);
         break;
 
     case INDEX_op_sari_vec:
         expand_vec_sari(tcg_ctx, type, vece, v0, v1, a2);
         break;
 
+    case INDEX_op_rotli_vec:
+        expand_vec_rotli(tcg_ctx, type, vece, v0, v1, a2);
+        break;
+
+    case INDEX_op_rotls_vec:
+        expand_vec_rotls(tcg_ctx, type, vece, v0, v1, temp_tcgv_i32(tcg_ctx, arg_temp(a2)));
+        break;
+
+    case INDEX_op_rotlv_vec:
+        v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2));
+        expand_vec_rotv(tcg_ctx, type, vece, v0, v1, v2, false);
+        break;
+    case INDEX_op_rotrv_vec:
+        v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2));
+        expand_vec_rotv(tcg_ctx, type, vece, v0, v1, v2, true);
+        break;
+
     case INDEX_op_mul_vec:
         v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2));
         expand_vec_mul(tcg_ctx, type, vece, v0, v1, v2);
diff --git a/qemu/tcg/mips/tcg-target.inc.c b/qemu/tcg/mips/tcg-target.inc.c
index ed5a9356c3..addf4c661d 100644
--- a/qemu/tcg/mips/tcg-target.inc.c
+++ b/qemu/tcg/mips/tcg-target.inc.c
@@ -1215,9 +1215,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
                              TCGReg addrh, TCGMemOpIdx oi,
                              tcg_insn_unit *label_ptr[2], bool is_load)
 {
-#ifdef TARGET_ARM
     struct uc_struct *uc = s->uc;
-#endif
     MemOp opc = get_memop(oi);
     unsigned s_bits = opc & MO_SIZE;
     unsigned a_bits = get_alignment_bits(opc);
diff --git a/qemu/tcg/ppc/tcg-target.h b/qemu/tcg/ppc/tcg-target.h
index 4fa21f0e71..be5b2901c3 100644
--- a/qemu/tcg/ppc/tcg-target.h
+++ b/qemu/tcg/ppc/tcg-target.h
@@ -161,6 +161,9 @@ extern bool have_vsx;
 #define TCG_TARGET_HAS_not_vec          1
 #define TCG_TARGET_HAS_neg_vec          have_isa_3_00
 #define TCG_TARGET_HAS_abs_vec          0
+#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         1
 #define TCG_TARGET_HAS_shi_vec          0
 #define TCG_TARGET_HAS_shs_vec          0
 #define TCG_TARGET_HAS_shv_vec          1
diff --git a/qemu/tcg/ppc/tcg-target.inc.c b/qemu/tcg/ppc/tcg-target.inc.c
index 00b7942901..a74e02c9d8 100644
--- a/qemu/tcg/ppc/tcg-target.inc.c
+++ b/qemu/tcg/ppc/tcg-target.inc.c
@@ -1885,9 +1885,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
                                TCGReg addrlo, TCGReg addrhi,
                                int mem_index, bool is_read)
 {
-#ifdef TARGET_ARM
-    struct uc_struct *uc = s->uc;
-#endif
+    UNICORN_UNUSED struct uc_struct *uc = s->uc;
     int cmp_off
         = (is_read
            ? offsetof(CPUTLBEntry, addr_read)
@@ -2623,21 +2621,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 
     case INDEX_op_shl_i32:
         if (const_args[2]) {
-            tcg_out_shli32(s, args[0], args[1], args[2]);
+            /* Limit immediate shift count lest we create an illegal insn.  */
+            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
         } else {
             tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
         }
         break;
     case INDEX_op_shr_i32:
         if (const_args[2]) {
-            tcg_out_shri32(s, args[0], args[1], args[2]);
+            /* Limit immediate shift count lest we create an illegal insn.  */
+            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
         } else {
             tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
         }
         break;
     case INDEX_op_sar_i32:
         if (const_args[2]) {
-            tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2]));
+            /* Limit immediate shift count lest we create an illegal insn.  */
+            tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31));
         } else {
             tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
         }
@@ -2709,14 +2710,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 
     case INDEX_op_shl_i64:
         if (const_args[2]) {
-            tcg_out_shli64(s, args[0], args[1], args[2]);
+            /* Limit immediate shift count lest we create an illegal insn.  */
+            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
         } else {
             tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
         }
         break;
     case INDEX_op_shr_i64:
         if (const_args[2]) {
-            tcg_out_shri64(s, args[0], args[1], args[2]);
+            /* Limit immediate shift count lest we create an illegal insn.  */
+            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
         } else {
             tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
         }
@@ -3008,6 +3011,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign
     case INDEX_op_shlv_vec:
     case INDEX_op_shrv_vec:
     case INDEX_op_sarv_vec:
+    case INDEX_op_rotlv_vec:
         return vece <= MO_32 || have_isa_2_07;
     case INDEX_op_ssadd_vec:
     case INDEX_op_sssub_vec:
@@ -3018,6 +3022,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign
     case INDEX_op_shli_vec:
     case INDEX_op_shri_vec:
     case INDEX_op_sari_vec:
+    case INDEX_op_rotli_vec:
         return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
     case INDEX_op_neg_vec:
         return vece >= MO_32 && have_isa_3_00;
@@ -3032,6 +3037,8 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign
         return 0;
     case INDEX_op_bitsel_vec:
         return have_vsx;
+    case INDEX_op_rotrv_vec:
+        return -1;
     default:
         return 0;
     }
@@ -3314,7 +3321,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ppc_pkum_vec:
         insn = pkum_op[vece];
         break;
-    case INDEX_op_ppc_rotl_vec:
+    case INDEX_op_rotlv_vec:
         insn = rotl_op[vece];
         break;
     case INDEX_op_ppc_msum_vec:
@@ -3422,7 +3429,7 @@ static void expand_vec_mul(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCG
         t3 = tcg_temp_new_vec(tcg_ctx, type);
         t4 = tcg_temp_new_vec(tcg_ctx, type);
         tcg_gen_dupi_vec(tcg_ctx, MO_8, t4, -16);
-        vec_gen_3(tcg_ctx, INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(tcg_ctx, t1),
+        vec_gen_3(tcg_ctx, INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(tcg_ctx, t1),
                   tcgv_vec_arg(tcg_ctx, v2), tcgv_vec_arg(tcg_ctx, t4));
         vec_gen_3(tcg_ctx, INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(tcg_ctx, t2),
                   tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v2));
@@ -3447,7 +3454,7 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne
                        TCGArg a0, ...)
 {
     va_list va;
-    TCGv_vec v0, v1, v2;
+    TCGv_vec v0, v1, v2, t0;
     TCGArg a2;
 
     va_start(va, a0);
@@ -3465,6 +3472,9 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne
     case INDEX_op_sari_vec:
         expand_vec_shi(tcg_ctx, type, vece, v0, v1, a2, INDEX_op_sarv_vec);
         break;
+    case INDEX_op_rotli_vec:
+        expand_vec_shi(tcg_ctx, type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
+        break;
     case INDEX_op_cmp_vec:
         v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2));
         expand_vec_cmp(tcg_ctx, type, vece, v0, v1, v2, va_arg(va, TCGArg));
@@ -3473,6 +3483,13 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne
         v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2));
         expand_vec_mul(tcg_ctx, type, vece, v0, v1, v2);
         break;
+    case INDEX_op_rotlv_vec:
+        v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2));
+        t0 = tcg_temp_new_vec(tcg_ctx, type);
+        tcg_gen_neg_vec(tcg_ctx, vece, t0, v2);
+        tcg_gen_rotlv_vec(tcg_ctx, vece, v0, v1, t0);
+        tcg_temp_free_vec(tcg_ctx, t0);
+        break;
     default:
         g_assert_not_reached();
     }
@@ -3677,12 +3694,13 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
     case INDEX_op_shlv_vec:
     case INDEX_op_shrv_vec:
     case INDEX_op_sarv_vec:
+    case INDEX_op_rotlv_vec:
+    case INDEX_op_rotrv_vec:
     case INDEX_op_ppc_mrgh_vec:
     case INDEX_op_ppc_mrgl_vec:
     case INDEX_op_ppc_muleu_vec:
     case INDEX_op_ppc_mulou_vec:
     case INDEX_op_ppc_pkum_vec:
-    case INDEX_op_ppc_rotl_vec:
     case INDEX_op_dup2_vec:
         return &v_v_v;
     case INDEX_op_not_vec:
diff --git a/qemu/tcg/ppc/tcg-target.opc.h b/qemu/tcg/ppc/tcg-target.opc.h
index 1373f77e82..db514403c3 100644
--- a/qemu/tcg/ppc/tcg-target.opc.h
+++ b/qemu/tcg/ppc/tcg-target.opc.h
@@ -30,4 +30,3 @@ DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
 DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
 DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
 DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
-DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC)
diff --git a/qemu/tcg/riscv/tcg-target.inc.c b/qemu/tcg/riscv/tcg-target.inc.c
index 2a5d3347d3..3d34141092 100644
--- a/qemu/tcg/riscv/tcg-target.inc.c
+++ b/qemu/tcg/riscv/tcg-target.inc.c
@@ -502,10 +502,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
         break;
     case R_RISCV_JAL:
         return reloc_jimm20(code_ptr, (tcg_insn_unit *)value);
-        break;
     case R_RISCV_CALL:
         return reloc_call(code_ptr, (tcg_insn_unit *)value);
-        break;
     default:
         tcg_abort();
     }
@@ -970,9 +968,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
                              TCGReg addrh, TCGMemOpIdx oi,
                              tcg_insn_unit **label_ptr, bool is_load)
 {
-#ifdef TARGET_ARM
     struct uc_struct *uc = s->uc;
-#endif
     MemOp opc = get_memop(oi);
     unsigned s_bits = opc & MO_SIZE;
     unsigned a_bits = get_alignment_bits(opc);
diff --git a/qemu/tcg/s390/tcg-target.inc.c b/qemu/tcg/s390/tcg-target.inc.c
index c8fa20046f..3d64a675eb 100644
--- a/qemu/tcg/s390/tcg-target.inc.c
+++ b/qemu/tcg/s390/tcg-target.inc.c
@@ -1547,9 +1547,7 @@ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
                                int mem_index, bool is_ld)
 {
-#ifdef TARGET_ARM
     struct uc_struct *uc = s->uc;
-#endif
     unsigned s_bits = opc & MO_SIZE;
     unsigned a_bits = get_alignment_bits(opc);
     unsigned s_mask = (1 << s_bits) - 1;
diff --git a/qemu/tcg/sparc/tcg-target.inc.c b/qemu/tcg/sparc/tcg-target.inc.c
index d4bc69d3b5..cf5533e8f4 100644
--- a/qemu/tcg/sparc/tcg-target.inc.c
+++ b/qemu/tcg/sparc/tcg-target.inc.c
@@ -1083,9 +1083,7 @@ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
 static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
                                MemOp opc, int which)
 {
-#ifdef TARGET_ARM
     struct uc_struct *uc = s->uc;
-#endif
     int fast_off = TLB_MASK_TABLE_OFS(mem_index);
     int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
     int table_off = fast_off + offsetof(CPUTLBDescFast, table);
diff --git a/qemu/tcg/tcg-op-gvec.c b/qemu/tcg/tcg-op-gvec.c
index cab429c44a..13e4b287b8 100644
--- a/qemu/tcg/tcg-op-gvec.c
+++ b/qemu/tcg/tcg-op-gvec.c
@@ -325,11 +325,35 @@ void tcg_gen_gvec_5_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint3
    in units of LNSZ.  This limits the expansion of inline code.  */
 static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
 {
-    if (oprsz % lnsz == 0) {
-        uint32_t lnct = oprsz / lnsz;
-        return lnct >= 1 && lnct <= MAX_UNROLL;
+    uint32_t q, r;
+
+    if (oprsz < lnsz) {
+        return false;
     }
-    return false;
+
+    q = oprsz / lnsz;
+    r = oprsz % lnsz;
+    tcg_debug_assert((r & 7) == 0);
+
+    if (lnsz < 16) {
+        /* For sizes below 16, accept no remainder. */
+        if (r != 0) {
+            return false;
+        }
+    } else {
+        /*
+         * Recall that ARM SVE allows vector sizes that are not a
+         * power of 2, but always a multiple of 16.  The intent is
+         * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+         * In addition, expand_clr needs to handle a multiple of 8.
+         * Thus we can handle the tail with one more operation per
+         * diminishing power of 2.
+         */
+        q += ctpop32(r);
+    }
+
+    return q <= MAX_UNROLL;
+
 }
 
 static void expand_clr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t maxsz);
@@ -404,22 +428,31 @@ static void gen_dup_i64(TCGContext *tcg_ctx, unsigned vece, TCGv_i64 out, TCGv_i
 static TCGType choose_vector_type(TCGContext *tcg_ctx, const TCGOpcode *list, unsigned vece,
                                   uint32_t size, bool prefer_i64)
 {
-    if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) {
-        /*
-         * Recall that ARM SVE allows vector sizes that are not a
-         * power of 2, but always a multiple of 16.  The intent is
-         * that e.g. size == 80 would be expanded with 2x32 + 1x16.
-         * It is hard to imagine a case in which v256 is supported
-         * but v128 is not, but check anyway.
-         */
-        if (tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V256, vece)
-            && (size % 32 == 0
-                || tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece))) {
-            return TCG_TYPE_V256;
-        }
-    }
-    if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16)
-        && tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece)) {
+    /*
+     * Recall that ARM SVE allows vector sizes that are not a
+     * power of 2, but always a multiple of 16.  The intent is
+     * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+     * It is hard to imagine a case in which v256 is supported
+     * but v128 is not, but check anyway.
+     * In addition, expand_clr needs to handle a multiple of 8.
+     */
+    if (TCG_TARGET_HAS_v256 &&
+        check_size_impl(size, 32) &&
+        tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V256, vece) &&
+        (!(size & 16) ||
+         (TCG_TARGET_HAS_v128 &&
+          tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece))) &&
+        (!(size & 8) ||
+         (TCG_TARGET_HAS_v64 &&
+          tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V64, vece)))) {
+        return TCG_TYPE_V256;
+    }
+    if (TCG_TARGET_HAS_v128 &&
+        check_size_impl(size, 16) &&
+        tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece) &&
+        (!(size & 8) ||
+         (TCG_TARGET_HAS_v64 &&
+          tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V64, vece)))) {
         return TCG_TYPE_V128;
     }
     if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8)
@@ -434,6 +467,18 @@ static void do_dup_store(TCGContext *tcg_ctx, TCGType type, uint32_t dofs, uint3
 {
     uint32_t i = 0;
 
+    tcg_debug_assert(oprsz >= 8);
+
+    /*
+     * This may be expand_clr for the tail of an operation, e.g.
+     * oprsz == 8 && maxsz == 64.  The first 8 bytes of this store
+     * are misaligned wrt the maximum vector size, so do that first.
+     */
+    if (dofs & 8) {
+        tcg_gen_stl_vec(tcg_ctx, t_vec, tcg_ctx->cpu_env, dofs + i, TCG_TYPE_V64);
+        i += 8;
+    }
+
     switch (type) {
     case TCG_TYPE_V256:
         /*
@@ -621,17 +666,22 @@ static void expand_clr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t maxsz)
 
 /* Expand OPSZ bytes worth of two-operand operations using i32 elements.  */
 static void expand_2_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz,
-                         void (*fni)(TCGContext *, TCGv_i32, TCGv_i32))
+                         bool load_dest, void (*fni)(TCGContext *, TCGv_i32, TCGv_i32))
 {
     TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx);
+    TCGv_i32 t1 = tcg_temp_new_i32(tcg_ctx);
     uint32_t i;
 
     for (i = 0; i < oprsz; i += 4) {
         tcg_gen_ld_i32(tcg_ctx, t0, tcg_ctx->cpu_env, aofs + i);
-        fni(tcg_ctx, t0, t0);
-        tcg_gen_st_i32(tcg_ctx, t0, tcg_ctx->cpu_env, dofs + i);
+        if (load_dest) {
+            tcg_gen_ld_i32(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i);
+        }
+        fni(tcg_ctx, t1, t0);
+        tcg_gen_st_i32(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i);
     }
     tcg_temp_free_i32(tcg_ctx, t0);
+    tcg_temp_free_i32(tcg_ctx, t1);
 }
 
 static void expand_2i_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz,
@@ -751,17 +801,22 @@ static void expand_4_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint
 
 /* Expand OPSZ bytes worth of two-operand operations using i64 elements.  */
 static void expand_2_i64(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz,
-                         void (*fni)(TCGContext *, TCGv_i64, TCGv_i64))
+                         bool load_dest, void (*fni)(TCGContext *, TCGv_i64, TCGv_i64))
 {
     TCGv_i64 t0 = tcg_temp_new_i64(tcg_ctx);
+    TCGv_i64 t1 = tcg_temp_new_i64(tcg_ctx);
     uint32_t i;
 
     for (i = 0; i < oprsz; i += 8) {
         tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, aofs + i);
-        fni(tcg_ctx, t0, t0);
-        tcg_gen_st_i64(tcg_ctx, t0, tcg_ctx->cpu_env, dofs + i);
+        if (load_dest) {
+            tcg_gen_ld_i64(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i);
+        }
+        fni(tcg_ctx, t1, t0);
+        tcg_gen_st_i64(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i);
     }
     tcg_temp_free_i64(tcg_ctx, t0);
+    tcg_temp_free_i64(tcg_ctx, t1);
 }
 
 static void expand_2i_i64(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz,
@@ -882,17 +937,23 @@ static void expand_4_i64(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint
 /* Expand OPSZ bytes worth of two-operand operations using host vectors.  */
 static void expand_2_vec(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
                          uint32_t oprsz, uint32_t tysz, TCGType type,
+                         bool load_dest,
                          void (*fni)(TCGContext *, unsigned, TCGv_vec, TCGv_vec))
 {
     TCGv_vec t0 = tcg_temp_new_vec(tcg_ctx, type);
+    TCGv_vec t1 = tcg_temp_new_vec(tcg_ctx, type);
     uint32_t i;
 
     for (i = 0; i < oprsz; i += tysz) {
         tcg_gen_ld_vec(tcg_ctx, t0, tcg_ctx->cpu_env, aofs + i);
-        fni(tcg_ctx, vece, t0, t0);
-        tcg_gen_st_vec(tcg_ctx, t0, tcg_ctx->cpu_env, dofs + i);
+        if (load_dest) {
+            tcg_gen_ld_vec(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i);
+        }
+        fni(tcg_ctx, vece, t1, t0);
+        tcg_gen_st_vec(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i);
     }
     tcg_temp_free_vec(tcg_ctx, t0);
+    tcg_temp_free_vec(tcg_ctx, t1);
 }
 
 /* Expand OPSZ bytes worth of two-vector operands and an immediate operand
@@ -1046,7 +1107,8 @@ void tcg_gen_gvec_2(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
          * that e.g. size == 80 would be expanded with 2x32 + 1x16.
          */
         some = QEMU_ALIGN_DOWN(oprsz, 32);
-        expand_2_vec(tcg_ctx, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv);
+        expand_2_vec(tcg_ctx, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
+                     g->load_dest, g->fniv);
         if (some == oprsz) {
             break;
         }
@@ -1056,17 +1118,19 @@ void tcg_gen_gvec_2(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs,
         maxsz -= some;
         /* fallthru */
     case TCG_TYPE_V128:
-        expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv);
+        expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
+                     g->load_dest, g->fniv);
         break;
     case TCG_TYPE_V64:
-        expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv);
+        expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
+                     g->load_dest, g->fniv);
         break;
 
     case 0:
         if (g->fni8 && check_size_impl(oprsz, 8)) {
-            expand_2_i64(tcg_ctx, dofs, aofs, oprsz, g->fni8);
+            expand_2_i64(tcg_ctx, dofs, aofs, oprsz, g->load_dest, g->fni8);
         } else if (g->fni4 && check_size_impl(oprsz, 4)) {
-            expand_2_i32(tcg_ctx, dofs, aofs, oprsz, g->fni4);
+            expand_2_i32(tcg_ctx, dofs, aofs, oprsz, g->load_dest, g->fni4);
         } else {
             assert(g->fno != NULL);
             tcg_gen_gvec_2_ool(tcg_ctx, dofs, aofs, oprsz, maxsz, g->data, g->fno);
@@ -1543,32 +1607,11 @@ void tcg_gen_gvec_dup_mem(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uin
     }
 }
 
-void tcg_gen_gvec_dup64i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz,
-                         uint32_t maxsz, uint64_t x)
-{
-    check_size_align(oprsz, maxsz, dofs);
-    do_dup(tcg_ctx, MO_64, dofs, oprsz, maxsz, NULL, NULL, x);
-}
-
-void tcg_gen_gvec_dup32i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz,
-                         uint32_t maxsz, uint32_t x)
-{
-    check_size_align(oprsz, maxsz, dofs);
-    do_dup(tcg_ctx, MO_32, dofs, oprsz, maxsz, NULL, NULL, x);
-}
-
-void tcg_gen_gvec_dup16i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz,
-                         uint32_t maxsz, uint16_t x)
-{
-    check_size_align(oprsz, maxsz, dofs);
-    do_dup(tcg_ctx, MO_16, dofs, oprsz, maxsz, NULL, NULL, x);
-}
-
-void tcg_gen_gvec_dup8i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz,
-                         uint32_t maxsz, uint8_t x)
+void tcg_gen_gvec_dup_imm(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t oprsz,
+                          uint32_t maxsz, uint64_t x)
 {
     check_size_align(oprsz, maxsz, dofs);
-    do_dup(tcg_ctx, MO_8, dofs, oprsz, maxsz, NULL, NULL, x);
+    do_dup(tcg_ctx, vece, dofs, oprsz, maxsz, NULL, NULL, x);
 }
 
 void tcg_gen_gvec_not(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
@@ -2321,7 +2364,7 @@ void tcg_gen_gvec_xor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_
     };
 
     if (aofs == bofs) {
-        tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, 0);
+        tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, 0);
     } else {
         tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g);
     }
@@ -2338,7 +2381,7 @@ void tcg_gen_gvec_andc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32
     };
 
     if (aofs == bofs) {
-        tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, 0);
+        tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, 0);
     } else {
         tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g);
     }
@@ -2355,7 +2398,7 @@ void tcg_gen_gvec_orc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_
     };
 
     if (aofs == bofs) {
-        tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, -1);
+        tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, -1);
     } else {
         tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g);
     }
@@ -2406,7 +2449,7 @@ void tcg_gen_gvec_eqv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_
     };
 
     if (aofs == bofs) {
-        tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, -1);
+        tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, -1);
     } else {
         tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g);
     }
@@ -2654,6 +2697,74 @@ void tcg_gen_gvec_sari(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32
     }
 }
 
+void tcg_gen_vec_rotl8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+    uint64_t mask = dup_const(MO_8, 0xff << c);
+
+    tcg_gen_shli_i64(tcg_ctx, d, a, c);
+    tcg_gen_shri_i64(tcg_ctx, a, a, 8 - c);
+    tcg_gen_andi_i64(tcg_ctx, d, d, mask);
+    tcg_gen_andi_i64(tcg_ctx, a, a, ~mask);
+    tcg_gen_or_i64(tcg_ctx, d, d, a);
+}
+
+void tcg_gen_vec_rotl16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+    uint64_t mask = dup_const(MO_16, 0xffff << c);
+
+    tcg_gen_shli_i64(tcg_ctx, d, a, c);
+    tcg_gen_shri_i64(tcg_ctx, a, a, 16 - c);
+    tcg_gen_andi_i64(tcg_ctx, d, d, mask);
+    tcg_gen_andi_i64(tcg_ctx, a, a, ~mask);
+    tcg_gen_or_i64(tcg_ctx, d, d, a);
+}
+
+void tcg_gen_gvec_rotli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+    static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
+    static const GVecGen2i g[4] = {
+        { .fni8 = tcg_gen_vec_rotl8i_i64,
+          .fniv = tcg_gen_rotli_vec,
+          .fno = gen_helper_gvec_rotl8i,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fni8 = tcg_gen_vec_rotl16i_i64,
+          .fniv = tcg_gen_rotli_vec,
+          .fno = gen_helper_gvec_rotl16i,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = tcg_gen_rotli_i32,
+          .fniv = tcg_gen_rotli_vec,
+          .fno = gen_helper_gvec_rotl32i,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = tcg_gen_rotli_i64,
+          .fniv = tcg_gen_rotli_vec,
+          .fno = gen_helper_gvec_rotl64i,
+          .opt_opc = vecop_list,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .vece = MO_64 },
+    };
+
+    tcg_debug_assert(vece <= MO_64);
+    tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+    if (shift == 0) {
+        tcg_gen_gvec_mov(tcg_ctx, vece, dofs, aofs, oprsz, maxsz);
+    } else {
+        tcg_gen_gvec_2i(tcg_ctx, dofs, aofs, oprsz, maxsz, shift, &g[vece]);
+    }
+}
+
+void tcg_gen_gvec_rotri(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+    tcg_debug_assert(vece <= MO_64);
+    tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+    tcg_gen_gvec_rotli(tcg_ctx, vece, dofs, aofs, -shift & ((8 << vece) - 1),
+                       oprsz, maxsz);
+}
+
 /*
  * Specialized generation vector shifts by a non-constant scalar.
  */
@@ -2868,6 +2979,28 @@ void tcg_gen_gvec_sars(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32
     do_gvec_shifts(tcg_ctx, vece, dofs, aofs, shift, oprsz, maxsz, &g);
 }
 
+void tcg_gen_gvec_rotls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
+{
+    static const GVecGen2sh g = {
+        .fni4 = tcg_gen_rotl_i32,
+        .fni8 = tcg_gen_rotl_i64,
+        .fniv_s = tcg_gen_rotls_vec,
+        .fniv_v = tcg_gen_rotlv_vec,
+        .fno = {
+            gen_helper_gvec_rotl8i,
+            gen_helper_gvec_rotl16i,
+            gen_helper_gvec_rotl32i,
+            gen_helper_gvec_rotl64i,
+        },
+        .s_list = { INDEX_op_rotls_vec, 0 },
+        .v_list = { INDEX_op_rotlv_vec, 0 },
+    };
+
+    tcg_debug_assert(vece <= MO_64);
+    do_gvec_shifts(tcg_ctx, vece, dofs, aofs, shift, oprsz, maxsz, &g);
+}
+
 /*
  * Expand D = A << (B % element bits)
  *
@@ -3063,6 +3196,128 @@ void tcg_gen_gvec_sarv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32
     tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
 }
 
+/*
+ * Similarly for rotates.
+ */
+
+static void tcg_gen_rotlv_mod_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d,
+                                  TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
+
+    tcg_gen_dupi_vec(tcg_ctx, vece, t, (8 << vece) - 1);
+    tcg_gen_and_vec(tcg_ctx, vece, t, t, b);
+    tcg_gen_rotlv_vec(tcg_ctx, vece, d, a, t);
+    tcg_temp_free_vec(tcg_ctx, t);
+}
+
+static void tcg_gen_rotl_mod_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
+
+    tcg_gen_andi_i32(tcg_ctx, t, b, 31);
+    tcg_gen_rotl_i32(tcg_ctx, d, a, t);
+    tcg_temp_free_i32(tcg_ctx, t);
+}
+
+static void tcg_gen_rotl_mod_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
+
+    tcg_gen_andi_i64(tcg_ctx, t, b, 63);
+    tcg_gen_rotl_i64(tcg_ctx, d, a, t);
+    tcg_temp_free_i64(tcg_ctx, t);
+}
+
+void tcg_gen_gvec_rotlv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+    static const TCGOpcode vecop_list[] = { INDEX_op_rotlv_vec, 0 };
+    static const GVecGen3 g[4] = {
+        { .fniv = tcg_gen_rotlv_mod_vec,
+          .fno = gen_helper_gvec_rotl8v,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fniv = tcg_gen_rotlv_mod_vec,
+          .fno = gen_helper_gvec_rotl16v,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = tcg_gen_rotl_mod_i32,
+          .fniv = tcg_gen_rotlv_mod_vec,
+          .fno = gen_helper_gvec_rotl32v,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = tcg_gen_rotl_mod_i64,
+          .fniv = tcg_gen_rotlv_mod_vec,
+          .fno = gen_helper_gvec_rotl64v,
+          .opt_opc = vecop_list,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .vece = MO_64 },
+    };
+
+    tcg_debug_assert(vece <= MO_64);
+    tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+static void tcg_gen_rotrv_mod_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d,
+                                  TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d);
+
+    tcg_gen_dupi_vec(tcg_ctx, vece, t, (8 << vece) - 1);
+    tcg_gen_and_vec(tcg_ctx, vece, t, t, b);
+    tcg_gen_rotrv_vec(tcg_ctx, vece, d, a, t);
+    tcg_temp_free_vec(tcg_ctx, t);
+}
+
+static void tcg_gen_rotr_mod_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t = tcg_temp_new_i32(tcg_ctx);
+
+    tcg_gen_andi_i32(tcg_ctx, t, b, 31);
+    tcg_gen_rotr_i32(tcg_ctx, d, a, t);
+    tcg_temp_free_i32(tcg_ctx, t);
+}
+
+static void tcg_gen_rotr_mod_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    TCGv_i64 t = tcg_temp_new_i64(tcg_ctx);
+
+    tcg_gen_andi_i64(tcg_ctx, t, b, 63);
+    tcg_gen_rotr_i64(tcg_ctx, d, a, t);
+    tcg_temp_free_i64(tcg_ctx, t);
+}
+
+void tcg_gen_gvec_rotrv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+    static const TCGOpcode vecop_list[] = { INDEX_op_rotrv_vec, 0 };
+    static const GVecGen3 g[4] = {
+        { .fniv = tcg_gen_rotrv_mod_vec,
+          .fno = gen_helper_gvec_rotr8v,
+          .opt_opc = vecop_list,
+          .vece = MO_8 },
+        { .fniv = tcg_gen_rotrv_mod_vec,
+          .fno = gen_helper_gvec_rotr16v,
+          .opt_opc = vecop_list,
+          .vece = MO_16 },
+        { .fni4 = tcg_gen_rotr_mod_i32,
+          .fniv = tcg_gen_rotrv_mod_vec,
+          .fno = gen_helper_gvec_rotr32v,
+          .opt_opc = vecop_list,
+          .vece = MO_32 },
+        { .fni8 = tcg_gen_rotr_mod_i64,
+          .fniv = tcg_gen_rotrv_mod_vec,
+          .fno = gen_helper_gvec_rotr64v,
+          .opt_opc = vecop_list,
+          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+          .vece = MO_64 },
+    };
+
+    tcg_debug_assert(vece <= MO_64);
+    tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
 /* Expand OPSZ bytes worth of three-operand operations using i32 elements.  */
 static void expand_cmp_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs,
                            uint32_t oprsz, TCGCond cond)
diff --git a/qemu/tcg/tcg-op-vec.c b/qemu/tcg/tcg-op-vec.c
index 99343962ac..02d3e22564 100644
--- a/qemu/tcg/tcg-op-vec.c
+++ b/qemu/tcg/tcg-op-vec.c
@@ -547,6 +547,18 @@ void tcg_gen_sari_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a
     do_shifti(tcg_ctx, INDEX_op_sari_vec, vece, r, a, i);
 }
 
+void tcg_gen_rotli_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+    do_shifti(tcg_ctx, INDEX_op_rotli_vec, vece, r, a, i);
+}
+
+void tcg_gen_rotri_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+    int bits = 8 << vece;
+    tcg_debug_assert(i >= 0 && i < bits);
+    do_shifti(tcg_ctx, INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
+}
+
 void tcg_gen_cmp_vec(TCGContext *tcg_ctx, TCGCond cond, unsigned vece,
                      TCGv_vec r, TCGv_vec a, TCGv_vec b)
 {
@@ -647,7 +659,9 @@ static void do_minmax(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a
                       TCGv_vec b, TCGOpcode opc, TCGCond cond)
 {
     if (!do_op3(tcg_ctx, vece, r, a, b, opc)) {
+        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
         tcg_gen_cmpsel_vec(tcg_ctx, cond, vece, r, a, b, a, b);
+        tcg_swap_vecop_list(hold_list);
     }
 }
 
@@ -686,8 +700,18 @@ void tcg_gen_sarv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a
     do_op3_nofail(tcg_ctx, vece, r, a, b, INDEX_op_sarv_vec);
 }
 
+void tcg_gen_rotlv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+    do_op3_nofail(tcg_ctx, vece, r, a, b, INDEX_op_rotlv_vec);
+}
+
+void tcg_gen_rotrv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+    do_op3_nofail(tcg_ctx, vece, r, a, b, INDEX_op_rotrv_vec);
+}
+
 static void do_shifts(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a,
-                      TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v)
+                      TCGv_i32 s, TCGOpcode opc)
 {
     TCGTemp *rt = tcgv_vec_temp(tcg_ctx, r);
     TCGTemp *at = tcgv_vec_temp(tcg_ctx, a);
@@ -696,48 +720,41 @@ static void do_shifts(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a
     TCGArg ai = temp_arg(at);
     TCGArg si = temp_arg(st);
     TCGType type = rt->base_type;
-    const TCGOpcode *hold_list;
     int can;
 
     tcg_debug_assert(at->base_type >= type);
-    tcg_assert_listed_vecop(opc_s);
-    hold_list = tcg_swap_vecop_list(NULL);
 
-    can = tcg_can_emit_vec_op(tcg_ctx, opc_s, type, vece);
+    tcg_assert_listed_vecop(opc);
+    can = tcg_can_emit_vec_op(tcg_ctx, opc, type, vece);
     if (can > 0) {
-        vec_gen_3(tcg_ctx, opc_s, type, vece, ri, ai, si);
+        vec_gen_3(tcg_ctx, opc, type, vece, ri, ai, si);
     } else if (can < 0) {
-        tcg_expand_vec_op(tcg_ctx, opc_s, type, vece, ri, ai, si);
+        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+        tcg_expand_vec_op(tcg_ctx, opc, type, vece, ri, ai, si);
+        tcg_swap_vecop_list(hold_list);
     } else {
-        TCGv_vec vec_s = tcg_temp_new_vec(tcg_ctx, type);
-
-        if (vece == MO_64) {
-            TCGv_i64 s64 = tcg_temp_new_i64(tcg_ctx);
-            tcg_gen_extu_i32_i64(tcg_ctx, s64, s);
-            tcg_gen_dup_i64_vec(tcg_ctx, MO_64, vec_s, s64);
-            tcg_temp_free_i64(tcg_ctx, s64);
-        } else {
-            tcg_gen_dup_i32_vec(tcg_ctx, vece, vec_s, s);
-        }
-        do_op3_nofail(tcg_ctx, vece, r, a, vec_s, opc_v);
-        tcg_temp_free_vec(tcg_ctx, vec_s);
+        g_assert_not_reached();
     }
-    tcg_swap_vecop_list(hold_list);
 }
 
 void tcg_gen_shls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 {
-    do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec);
+    do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shls_vec);
 }
 
 void tcg_gen_shrs_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 {
-    do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec);
+    do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shrs_vec);
 }
 
 void tcg_gen_sars_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 {
-    do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec);
+    do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_sars_vec);
+}
+
+void tcg_gen_rotls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
+{
+    do_shifts(tcg_ctx, vece, r, a, s, INDEX_op_rotls_vec);
 }
 
 void tcg_gen_bitsel_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a,
diff --git a/qemu/tcg/tcg-op.c b/qemu/tcg/tcg-op.c
index 8a5865dfe8..d2d44666cb 100644
--- a/qemu/tcg/tcg-op.c
+++ b/qemu/tcg/tcg-op.c
@@ -538,9 +538,9 @@ void tcg_gen_rotl_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32
     }
 }
 
-void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
-    tcg_debug_assert(arg2 < 32);
+    tcg_debug_assert(arg2 >= 0 && arg2 < 32);
     /* some cases can be optimized here */
     if (arg2 == 0) {
         tcg_gen_mov_i32(tcg_ctx, ret, arg1);
@@ -578,9 +578,9 @@ void tcg_gen_rotr_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32
     }
 }
 
-void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
-    tcg_debug_assert(arg2 < 32);
+    tcg_debug_assert(arg2 >= 0 && arg2 < 32);
     /* some cases can be optimized here */
     if (arg2 == 0) {
         tcg_gen_mov_i32(tcg_ctx, ret, arg1);
@@ -2000,9 +2000,9 @@ void tcg_gen_rotl_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64
 #endif
 }
 
-void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 {
-    tcg_debug_assert(arg2 < 64);
+    tcg_debug_assert(arg2 >= 0 && arg2 < 64);
     /* some cases can be optimized here */
     if (arg2 == 0) {
         tcg_gen_mov_i64(tcg_ctx, ret, arg1);
@@ -2041,9 +2041,9 @@ void tcg_gen_rotr_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64
 #endif
 }
 
-void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 {
-    tcg_debug_assert(arg2 < 64);
+    tcg_debug_assert(arg2 >= 0 && arg2 < 64);
     /* some cases can be optimized here */
     if (arg2 == 0) {
         tcg_gen_mov_i64(tcg_ctx, ret, arg1);
diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c
index 1c9353032e..3d23487176 100644
--- a/qemu/tcg/tcg.c
+++ b/qemu/tcg/tcg.c
@@ -1411,6 +1411,13 @@ bool tcg_op_supported(TCGOpcode op)
     case INDEX_op_shrv_vec:
     case INDEX_op_sarv_vec:
         return have_vec && TCG_TARGET_HAS_shv_vec;
+    case INDEX_op_rotli_vec:
+        return have_vec && TCG_TARGET_HAS_roti_vec;
+    case INDEX_op_rotls_vec:
+        return have_vec && TCG_TARGET_HAS_rots_vec;
+    case INDEX_op_rotlv_vec:
+    case INDEX_op_rotrv_vec:
+        return have_vec && TCG_TARGET_HAS_rotv_vec;
     case INDEX_op_ssadd_vec:
     case INDEX_op_usadd_vec:
     case INDEX_op_sssub_vec:
@@ -2779,34 +2786,68 @@ static bool liveness_pass_2(TCGContext *s)
         }
 
         /* Outputs become available.  */
-        for (i = 0; i < nb_oargs; i++) {
-            arg_ts = arg_temp(op->args[i]);
+        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
+            arg_ts = arg_temp(op->args[0]);
             dir_ts = arg_ts->state_ptr;
-            if (!dir_ts) {
-                continue;
+            if (dir_ts) {
+                op->args[0] = temp_arg(dir_ts);
+                changes = true;
+
+                /* The output is now live and modified.  */
+                arg_ts->state = 0;
+
+                if (NEED_SYNC_ARG(0)) {
+                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
+                                      ? INDEX_op_st_i32
+                                      : INDEX_op_st_i64);
+                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
+                    TCGTemp *out_ts = dir_ts;
+
+                    if (IS_DEAD_ARG(0)) {
+                        out_ts = arg_temp(op->args[1]);
+                        arg_ts->state = TS_DEAD;
+                        tcg_op_remove(s, op);
+                    } else {
+                        arg_ts->state = TS_MEM;
+                    }
+
+                    sop->args[0] = temp_arg(out_ts);
+                    sop->args[1] = temp_arg(arg_ts->mem_base);
+                    sop->args[2] = arg_ts->mem_offset;
+                } else {
+                    tcg_debug_assert(!IS_DEAD_ARG(0));
+                }
             }
-            op->args[i] = temp_arg(dir_ts);
-            changes = true;
+        } else {
+            for (i = 0; i < nb_oargs; i++) {
+                arg_ts = arg_temp(op->args[i]);
+                dir_ts = arg_ts->state_ptr;
+                if (!dir_ts) {
+                    continue;
+                }
+                op->args[i] = temp_arg(dir_ts);
+                changes = true;
 
-            /* The output is now live and modified.  */
-            arg_ts->state = 0;
+                /* The output is now live and modified.  */
+                arg_ts->state = 0;
 
-            /* Sync outputs upon their last write.  */
-            if (NEED_SYNC_ARG(i)) {
-                TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
-                                  ? INDEX_op_st_i32
-                                  : INDEX_op_st_i64);
-                TCGOp *sop = tcg_op_insert_after(s, op, sopc);
+                /* Sync outputs upon their last write.  */
+                if (NEED_SYNC_ARG(i)) {
+                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
+                                      ? INDEX_op_st_i32
+                                      : INDEX_op_st_i64);
+                    TCGOp *sop = tcg_op_insert_after(s, op, sopc);
 
-                sop->args[0] = temp_arg(dir_ts);
-                sop->args[1] = temp_arg(arg_ts->mem_base);
-                sop->args[2] = arg_ts->mem_offset;
+                    sop->args[0] = temp_arg(dir_ts);
+                    sop->args[1] = temp_arg(arg_ts->mem_base);
+                    sop->args[2] = arg_ts->mem_offset;
 
-                arg_ts->state = TS_MEM;
-            }
-            /* Drop outputs that are dead.  */
-            if (IS_DEAD_ARG(i)) {
-                arg_ts->state = TS_DEAD;
+                    arg_ts->state = TS_MEM;
+                }
+                /* Drop outputs that are dead.  */
+                if (IS_DEAD_ARG(i)) {
+                    arg_ts->state = TS_DEAD;
+                }
             }
         }
     }
diff --git a/qemu/tricore.h b/qemu/tricore.h
index e378868526..30cc3e9a50 100644
--- a/qemu/tricore.h
+++ b/qemu/tricore.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_tricore
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_tricore
 #define tcg_gen_st_i64 tcg_gen_st_i64_tricore
+#define tcg_gen_add_i64 tcg_gen_add_i64_tricore
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_tricore
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_tricore
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_tricore
 #define cpu_icount_to_ns cpu_icount_to_ns_tricore
 #define cpu_is_stopped cpu_is_stopped_tricore
 #define cpu_get_ticks cpu_get_ticks_tricore
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_tricore
 #define floatx80_mul floatx80_mul_tricore
 #define floatx80_div floatx80_div_tricore
+#define floatx80_modrem floatx80_modrem_tricore
+#define floatx80_mod floatx80_mod_tricore
 #define floatx80_rem floatx80_rem_tricore
 #define floatx80_sqrt floatx80_sqrt_tricore
 #define floatx80_eq floatx80_eq_tricore
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_tricore
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_tricore
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_tricore
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_tricore
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_tricore
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_tricore
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_tricore
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_tricore
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_tricore
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_tricore
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_tricore
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_tricore
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_tricore
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_tricore
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_tricore
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_tricore
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_tricore
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_tricore
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_tricore
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_tricore
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_tricore
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_tricore
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_tricore
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_tricore
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_tricore
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_tricore
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_tricore
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_tricore
 #define tcg_gen_shri_vec tcg_gen_shri_vec_tricore
 #define tcg_gen_sari_vec tcg_gen_sari_vec_tricore
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_tricore
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_tricore
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_tricore
 #define tcg_gen_add_vec tcg_gen_add_vec_tricore
 #define tcg_gen_sub_vec tcg_gen_sub_vec_tricore
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_tricore
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_tricore
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_tricore
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_tricore
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_tricore
 #define tcg_gen_shls_vec tcg_gen_shls_vec_tricore
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_tricore
 #define tcg_gen_sars_vec tcg_gen_sars_vec_tricore
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_tricore
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_tricore
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_tricore
 #define tb_htable_lookup tb_htable_lookup_tricore
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_tricore
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_tricore
 #define tlb_init tlb_init_tricore
+#define tlb_destroy tlb_destroy_tricore
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_tricore
 #define tlb_flush tlb_flush_tricore
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_tricore
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_tricore
 #define get_page_addr_code_hostp get_page_addr_code_hostp_tricore
 #define get_page_addr_code get_page_addr_code_tricore
+#define probe_access_flags probe_access_flags_tricore
 #define probe_access probe_access_tricore
 #define tlb_vaddr_to_host tlb_vaddr_to_host_tricore
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_tricore
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_tricore
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_tricore
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_tricore
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_tricore
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_tricore
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_tricore
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_tricore
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_tricore
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_tricore
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_tricore
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_tricore
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_tricore
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_tricore
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_tricore
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_tricore
 #define cpu_ldub_data_ra cpu_ldub_data_ra_tricore
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_tricore
-#define cpu_lduw_data_ra cpu_lduw_data_ra_tricore
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_tricore
-#define cpu_ldl_data_ra cpu_ldl_data_ra_tricore
-#define cpu_ldq_data_ra cpu_ldq_data_ra_tricore
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_tricore
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_tricore
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_tricore
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_tricore
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_tricore
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_tricore
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_tricore
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_tricore
 #define cpu_ldub_data cpu_ldub_data_tricore
 #define cpu_ldsb_data cpu_ldsb_data_tricore
-#define cpu_lduw_data cpu_lduw_data_tricore
-#define cpu_ldsw_data cpu_ldsw_data_tricore
-#define cpu_ldl_data cpu_ldl_data_tricore
-#define cpu_ldq_data cpu_ldq_data_tricore
+#define cpu_lduw_be_data cpu_lduw_be_data_tricore
+#define cpu_lduw_le_data cpu_lduw_le_data_tricore
+#define cpu_ldsw_be_data cpu_ldsw_be_data_tricore
+#define cpu_ldsw_le_data cpu_ldsw_le_data_tricore
+#define cpu_ldl_be_data cpu_ldl_be_data_tricore
+#define cpu_ldl_le_data cpu_ldl_le_data_tricore
+#define cpu_ldq_le_data cpu_ldq_le_data_tricore
+#define cpu_ldq_be_data cpu_ldq_be_data_tricore
 #define helper_ret_stb_mmu helper_ret_stb_mmu_tricore
 #define helper_le_stw_mmu helper_le_stw_mmu_tricore
 #define helper_be_stw_mmu helper_be_stw_mmu_tricore
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_tricore
 #define helper_be_stq_mmu helper_be_stq_mmu_tricore
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_tricore
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_tricore
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_tricore
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_tricore
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_tricore
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_tricore
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_tricore
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_tricore
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_tricore
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_tricore
 #define cpu_stb_data_ra cpu_stb_data_ra_tricore
-#define cpu_stw_data_ra cpu_stw_data_ra_tricore
-#define cpu_stl_data_ra cpu_stl_data_ra_tricore
-#define cpu_stq_data_ra cpu_stq_data_ra_tricore
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_tricore
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_tricore
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_tricore
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_tricore
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_tricore
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_tricore
 #define cpu_stb_data cpu_stb_data_tricore
-#define cpu_stw_data cpu_stw_data_tricore
-#define cpu_stl_data cpu_stl_data_tricore
-#define cpu_stq_data cpu_stq_data_tricore
+#define cpu_stw_be_data cpu_stw_be_data_tricore
+#define cpu_stw_le_data cpu_stw_le_data_tricore
+#define cpu_stl_be_data cpu_stl_be_data_tricore
+#define cpu_stl_le_data cpu_stl_le_data_tricore
+#define cpu_stq_be_data cpu_stq_be_data_tricore
+#define cpu_stq_le_data cpu_stq_le_data_tricore
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_tricore
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_tricore
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_tricore
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_tricore
 #define cpu_ldl_code cpu_ldl_code_tricore
 #define cpu_ldq_code cpu_ldq_code_tricore
+#define cpu_interrupt_handler cpu_interrupt_handler_tricore
 #define helper_div_i32 helper_div_i32_tricore
 #define helper_rem_i32 helper_rem_i32_tricore
 #define helper_divu_i32 helper_divu_i32_tricore
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_tricore
 #define helper_gvec_sar32i helper_gvec_sar32i_tricore
 #define helper_gvec_sar64i helper_gvec_sar64i_tricore
+#define helper_gvec_rotl8i helper_gvec_rotl8i_tricore
+#define helper_gvec_rotl16i helper_gvec_rotl16i_tricore
+#define helper_gvec_rotl32i helper_gvec_rotl32i_tricore
+#define helper_gvec_rotl64i helper_gvec_rotl64i_tricore
 #define helper_gvec_shl8v helper_gvec_shl8v_tricore
 #define helper_gvec_shl16v helper_gvec_shl16v_tricore
 #define helper_gvec_shl32v helper_gvec_shl32v_tricore
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_tricore
 #define helper_gvec_sar32v helper_gvec_sar32v_tricore
 #define helper_gvec_sar64v helper_gvec_sar64v_tricore
+#define helper_gvec_rotl8v helper_gvec_rotl8v_tricore
+#define helper_gvec_rotl16v helper_gvec_rotl16v_tricore
+#define helper_gvec_rotl32v helper_gvec_rotl32v_tricore
+#define helper_gvec_rotl64v helper_gvec_rotl64v_tricore
+#define helper_gvec_rotr8v helper_gvec_rotr8v_tricore
+#define helper_gvec_rotr16v helper_gvec_rotr16v_tricore
+#define helper_gvec_rotr32v helper_gvec_rotr32v_tricore
+#define helper_gvec_rotr64v helper_gvec_rotr64v_tricore
 #define helper_gvec_eq8 helper_gvec_eq8_tricore
 #define helper_gvec_ne8 helper_gvec_ne8_tricore
 #define helper_gvec_lt8 helper_gvec_lt8_tricore
diff --git a/qemu/util/guest-random.c b/qemu/util/guest-random.c
index 7c1fe7be4f..9a0f300ba4 100644
--- a/qemu/util/guest-random.c
+++ b/qemu/util/guest-random.c
@@ -78,4 +78,3 @@ void qemu_guest_random_seed_thread_part2(uint64_t seed)
     }
 #endif
 }
-
diff --git a/qemu/x86_64.h b/qemu/x86_64.h
index 0118257e9e..cec72b9667 100644
--- a/qemu/x86_64.h
+++ b/qemu/x86_64.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_x86_64
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_x86_64
 #define tcg_gen_st_i64 tcg_gen_st_i64_x86_64
+#define tcg_gen_add_i64 tcg_gen_add_i64_x86_64
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_x86_64
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_x86_64
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_x86_64
 #define cpu_icount_to_ns cpu_icount_to_ns_x86_64
 #define cpu_is_stopped cpu_is_stopped_x86_64
 #define cpu_get_ticks cpu_get_ticks_x86_64
@@ -374,6 +377,8 @@
 #define floatx80_sub floatx80_sub_x86_64
 #define floatx80_mul floatx80_mul_x86_64
 #define floatx80_div floatx80_div_x86_64
+#define floatx80_modrem floatx80_modrem_x86_64
+#define floatx80_mod floatx80_mod_x86_64
 #define floatx80_rem floatx80_rem_x86_64
 #define floatx80_sqrt floatx80_sqrt_x86_64
 #define floatx80_eq floatx80_eq_x86_64
@@ -648,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_x86_64
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_x86_64
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_x86_64
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_x86_64
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_x86_64
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_x86_64
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_x86_64
@@ -702,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_x86_64
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_x86_64
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_x86_64
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_x86_64
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_x86_64
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_x86_64
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_x86_64
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_x86_64
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_x86_64
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_x86_64
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_x86_64
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_x86_64
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_x86_64
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_x86_64
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_x86_64
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_x86_64
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_x86_64
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_x86_64
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_x86_64
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_x86_64
@@ -745,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_x86_64
 #define tcg_gen_shri_vec tcg_gen_shri_vec_x86_64
 #define tcg_gen_sari_vec tcg_gen_sari_vec_x86_64
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_x86_64
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_x86_64
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_x86_64
 #define tcg_gen_add_vec tcg_gen_add_vec_x86_64
 #define tcg_gen_sub_vec tcg_gen_sub_vec_x86_64
@@ -760,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_x86_64
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_x86_64
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_x86_64
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_x86_64
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_x86_64
 #define tcg_gen_shls_vec tcg_gen_shls_vec_x86_64
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_x86_64
 #define tcg_gen_sars_vec tcg_gen_sars_vec_x86_64
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_x86_64
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_x86_64
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_x86_64
 #define tb_htable_lookup tb_htable_lookup_x86_64
@@ -774,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_x86_64
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_x86_64
 #define tlb_init tlb_init_x86_64
+#define tlb_destroy tlb_destroy_x86_64
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_x86_64
 #define tlb_flush tlb_flush_x86_64
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_x86_64
@@ -794,6 +813,7 @@
 #define tlb_set_page tlb_set_page_x86_64
 #define get_page_addr_code_hostp get_page_addr_code_hostp_x86_64
 #define get_page_addr_code get_page_addr_code_x86_64
+#define probe_access_flags probe_access_flags_x86_64
 #define probe_access probe_access_x86_64
 #define tlb_vaddr_to_host tlb_vaddr_to_host_x86_64
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_x86_64
@@ -810,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_x86_64
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_x86_64
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_x86_64
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_x86_64
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_x86_64
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_x86_64
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_x86_64
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_x86_64
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_x86_64
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_x86_64
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_x86_64
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_x86_64
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_x86_64
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_x86_64
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_x86_64
 #define cpu_ldub_data_ra cpu_ldub_data_ra_x86_64
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_x86_64
-#define cpu_lduw_data_ra cpu_lduw_data_ra_x86_64
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_x86_64
-#define cpu_ldl_data_ra cpu_ldl_data_ra_x86_64
-#define cpu_ldq_data_ra cpu_ldq_data_ra_x86_64
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_x86_64
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_x86_64
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_x86_64
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_x86_64
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_x86_64
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_x86_64
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_x86_64
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_x86_64
 #define cpu_ldub_data cpu_ldub_data_x86_64
 #define cpu_ldsb_data cpu_ldsb_data_x86_64
-#define cpu_lduw_data cpu_lduw_data_x86_64
-#define cpu_ldsw_data cpu_ldsw_data_x86_64
-#define cpu_ldl_data cpu_ldl_data_x86_64
-#define cpu_ldq_data cpu_ldq_data_x86_64
+#define cpu_lduw_be_data cpu_lduw_be_data_x86_64
+#define cpu_lduw_le_data cpu_lduw_le_data_x86_64
+#define cpu_ldsw_be_data cpu_ldsw_be_data_x86_64
+#define cpu_ldsw_le_data cpu_ldsw_le_data_x86_64
+#define cpu_ldl_be_data cpu_ldl_be_data_x86_64
+#define cpu_ldl_le_data cpu_ldl_le_data_x86_64
+#define cpu_ldq_le_data cpu_ldq_le_data_x86_64
+#define cpu_ldq_be_data cpu_ldq_be_data_x86_64
 #define helper_ret_stb_mmu helper_ret_stb_mmu_x86_64
 #define helper_le_stw_mmu helper_le_stw_mmu_x86_64
 #define helper_be_stw_mmu helper_be_stw_mmu_x86_64
@@ -834,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_x86_64
 #define helper_be_stq_mmu helper_be_stq_mmu_x86_64
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_x86_64
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_x86_64
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_x86_64
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_x86_64
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_x86_64
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_x86_64
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_x86_64
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_x86_64
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_x86_64
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_x86_64
 #define cpu_stb_data_ra cpu_stb_data_ra_x86_64
-#define cpu_stw_data_ra cpu_stw_data_ra_x86_64
-#define cpu_stl_data_ra cpu_stl_data_ra_x86_64
-#define cpu_stq_data_ra cpu_stq_data_ra_x86_64
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_x86_64
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_x86_64
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_x86_64
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_x86_64
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_x86_64
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_x86_64
 #define cpu_stb_data cpu_stb_data_x86_64
-#define cpu_stw_data cpu_stw_data_x86_64
-#define cpu_stl_data cpu_stl_data_x86_64
-#define cpu_stq_data cpu_stq_data_x86_64
+#define cpu_stw_be_data cpu_stw_be_data_x86_64
+#define cpu_stw_le_data cpu_stw_le_data_x86_64
+#define cpu_stl_be_data cpu_stl_be_data_x86_64
+#define cpu_stl_le_data cpu_stl_le_data_x86_64
+#define cpu_stq_be_data cpu_stq_be_data_x86_64
+#define cpu_stq_le_data cpu_stq_le_data_x86_64
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_x86_64
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_x86_64
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_x86_64
@@ -1101,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_x86_64
 #define cpu_ldl_code cpu_ldl_code_x86_64
 #define cpu_ldq_code cpu_ldq_code_x86_64
+#define cpu_interrupt_handler cpu_interrupt_handler_x86_64
 #define helper_div_i32 helper_div_i32_x86_64
 #define helper_rem_i32 helper_rem_i32_x86_64
 #define helper_divu_i32 helper_divu_i32_x86_64
@@ -1185,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_x86_64
 #define helper_gvec_sar32i helper_gvec_sar32i_x86_64
 #define helper_gvec_sar64i helper_gvec_sar64i_x86_64
+#define helper_gvec_rotl8i helper_gvec_rotl8i_x86_64
+#define helper_gvec_rotl16i helper_gvec_rotl16i_x86_64
+#define helper_gvec_rotl32i helper_gvec_rotl32i_x86_64
+#define helper_gvec_rotl64i helper_gvec_rotl64i_x86_64
 #define helper_gvec_shl8v helper_gvec_shl8v_x86_64
 #define helper_gvec_shl16v helper_gvec_shl16v_x86_64
 #define helper_gvec_shl32v helper_gvec_shl32v_x86_64
@@ -1197,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_x86_64
 #define helper_gvec_sar32v helper_gvec_sar32v_x86_64
 #define helper_gvec_sar64v helper_gvec_sar64v_x86_64
+#define helper_gvec_rotl8v helper_gvec_rotl8v_x86_64
+#define helper_gvec_rotl16v helper_gvec_rotl16v_x86_64
+#define helper_gvec_rotl32v helper_gvec_rotl32v_x86_64
+#define helper_gvec_rotl64v helper_gvec_rotl64v_x86_64
+#define helper_gvec_rotr8v helper_gvec_rotr8v_x86_64
+#define helper_gvec_rotr16v helper_gvec_rotr16v_x86_64
+#define helper_gvec_rotr32v helper_gvec_rotr32v_x86_64
+#define helper_gvec_rotr64v helper_gvec_rotr64v_x86_64
 #define helper_gvec_eq8 helper_gvec_eq8_x86_64
 #define helper_gvec_ne8 helper_gvec_ne8_x86_64
 #define helper_gvec_lt8 helper_gvec_lt8_x86_64
@@ -1419,6 +1473,7 @@
 #define helper_xgetbv helper_xgetbv_x86_64
 #define helper_xsetbv helper_xsetbv_x86_64
 #define update_mxcsr_status update_mxcsr_status_x86_64
+#define update_mxcsr_from_sse_status update_mxcsr_from_sse_status_x86_64
 #define helper_ldmxcsr helper_ldmxcsr_x86_64
 #define helper_enter_mmx helper_enter_mmx_x86_64
 #define helper_emms helper_emms_x86_64
diff --git a/symbols.sh b/symbols.sh
index 11866f9334..4424fb4319 100755
--- a/symbols.sh
+++ b/symbols.sh
@@ -42,7 +42,10 @@ tcg_gen_sar_i64 \
 tcg_gen_shl_i64 \
 tcg_gen_shr_i64 \
 tcg_gen_st_i64 \
+tcg_gen_add_i64 \
+tcg_gen_sub_i64 \
 tcg_gen_xor_i64 \
+tcg_gen_neg_i64 \
 cpu_icount_to_ns \
 cpu_is_stopped \
 cpu_get_ticks \
@@ -374,6 +377,8 @@ floatx80_add \
 floatx80_sub \
 floatx80_mul \
 floatx80_div \
+floatx80_modrem \
+floatx80_mod \
 floatx80_rem \
 floatx80_sqrt \
 floatx80_eq \
@@ -648,6 +653,7 @@ tcg_gen_gvec_mov \
 tcg_gen_gvec_dup_i32 \
 tcg_gen_gvec_dup_i64 \
 tcg_gen_gvec_dup_mem \
+tcg_gen_gvec_dup_imm \
 tcg_gen_gvec_dup64i \
 tcg_gen_gvec_dup32i \
 tcg_gen_gvec_dup16i \
@@ -702,13 +708,20 @@ tcg_gen_vec_shr16i_i64 \
 tcg_gen_gvec_shri \
 tcg_gen_vec_sar8i_i64 \
 tcg_gen_vec_sar16i_i64 \
+tcg_gen_vec_rotl8i_i64 \
+tcg_gen_vec_rotl16i_i64 \
 tcg_gen_gvec_sari \
+tcg_gen_gvec_rotli \
+tcg_gen_gvec_rotri \
 tcg_gen_gvec_shls \
 tcg_gen_gvec_shrs \
 tcg_gen_gvec_sars \
+tcg_gen_gvec_rotls \
 tcg_gen_gvec_shlv \
 tcg_gen_gvec_shrv \
 tcg_gen_gvec_sarv \
+tcg_gen_gvec_rotlv \
+tcg_gen_gvec_rotrv \
 tcg_gen_gvec_cmp \
 tcg_gen_gvec_bitsel \
 tcg_can_emit_vecop_list \
@@ -745,6 +758,8 @@ tcg_gen_abs_vec \
 tcg_gen_shli_vec \
 tcg_gen_shri_vec \
 tcg_gen_sari_vec \
+tcg_gen_rotli_vec \
+tcg_gen_rotri_vec \
 tcg_gen_cmp_vec \
 tcg_gen_add_vec \
 tcg_gen_sub_vec \
@@ -760,9 +775,12 @@ tcg_gen_umax_vec \
 tcg_gen_shlv_vec \
 tcg_gen_shrv_vec \
 tcg_gen_sarv_vec \
+tcg_gen_rotlv_vec \
+tcg_gen_rotrv_vec \
 tcg_gen_shls_vec \
 tcg_gen_shrs_vec \
 tcg_gen_sars_vec \
+tcg_gen_rotls_vec \
 tcg_gen_bitsel_vec \
 tcg_gen_cmpsel_vec \
 tb_htable_lookup \
@@ -774,6 +792,7 @@ cpu_loop_exit \
 cpu_loop_exit_restore \
 cpu_loop_exit_atomic \
 tlb_init \
+tlb_destroy \
 tlb_flush_by_mmuidx \
 tlb_flush \
 tlb_flush_by_mmuidx_all_cpus \
@@ -794,6 +813,7 @@ tlb_set_page_with_attrs \
 tlb_set_page \
 get_page_addr_code_hostp \
 get_page_addr_code \
+probe_access_flags \
 probe_access \
 tlb_vaddr_to_host \
 helper_ret_ldub_mmu \
@@ -810,22 +830,34 @@ helper_le_ldsl_mmu \
 helper_be_ldsl_mmu \
 cpu_ldub_mmuidx_ra \
 cpu_ldsb_mmuidx_ra \
-cpu_lduw_mmuidx_ra \
-cpu_ldsw_mmuidx_ra \
-cpu_ldl_mmuidx_ra \
-cpu_ldq_mmuidx_ra \
+cpu_lduw_be_mmuidx_ra \
+cpu_lduw_le_mmuidx_ra \
+cpu_ldsw_be_mmuidx_ra \
+cpu_ldsw_le_mmuidx_ra \
+cpu_ldl_be_mmuidx_ra \
+cpu_ldl_le_mmuidx_ra \
+cpu_ldq_be_mmuidx_ra \
+cpu_ldq_le_mmuidx_ra \
 cpu_ldub_data_ra \
 cpu_ldsb_data_ra \
-cpu_lduw_data_ra \
-cpu_ldsw_data_ra \
-cpu_ldl_data_ra \
-cpu_ldq_data_ra \
+cpu_lduw_be_data_ra \
+cpu_lduw_le_data_ra \
+cpu_ldsw_be_data_ra \
+cpu_ldsw_le_data_ra \
+cpu_ldl_be_data_ra \
+cpu_ldl_le_data_ra \
+cpu_ldq_be_data_ra \
+cpu_ldq_le_data_ra \
 cpu_ldub_data \
 cpu_ldsb_data \
-cpu_lduw_data \
-cpu_ldsw_data \
-cpu_ldl_data \
-cpu_ldq_data \
+cpu_lduw_be_data \
+cpu_lduw_le_data \
+cpu_ldsw_be_data \
+cpu_ldsw_le_data \
+cpu_ldl_be_data \
+cpu_ldl_le_data \
+cpu_ldq_le_data \
+cpu_ldq_be_data \
 helper_ret_stb_mmu \
 helper_le_stw_mmu \
 helper_be_stw_mmu \
@@ -834,17 +866,26 @@ helper_be_stl_mmu \
 helper_le_stq_mmu \
 helper_be_stq_mmu \
 cpu_stb_mmuidx_ra \
-cpu_stw_mmuidx_ra \
-cpu_stl_mmuidx_ra \
-cpu_stq_mmuidx_ra \
+cpu_stw_be_mmuidx_ra \
+cpu_stw_le_mmuidx_ra \
+cpu_stl_be_mmuidx_ra \
+cpu_stl_le_mmuidx_ra \
+cpu_stq_be_mmuidx_ra \
+cpu_stq_le_mmuidx_ra \
 cpu_stb_data_ra \
-cpu_stw_data_ra \
-cpu_stl_data_ra \
-cpu_stq_data_ra \
+cpu_stw_be_data_ra \
+cpu_stw_le_data_ra \
+cpu_stl_be_data_ra \
+cpu_stl_le_data_ra \
+cpu_stq_be_data_ra \
+cpu_stq_le_data_ra \
 cpu_stb_data \
-cpu_stw_data \
-cpu_stl_data \
-cpu_stq_data \
+cpu_stw_be_data \
+cpu_stw_le_data \
+cpu_stl_be_data \
+cpu_stl_le_data \
+cpu_stq_be_data \
+cpu_stq_le_data \
 helper_atomic_cmpxchgb_mmu \
 helper_atomic_xchgb_mmu \
 helper_atomic_fetch_addb_mmu \
@@ -1101,6 +1142,7 @@ cpu_ldub_code \
 cpu_lduw_code \
 cpu_ldl_code \
 cpu_ldq_code \
+cpu_interrupt_handler \
 helper_div_i32 \
 helper_rem_i32 \
 helper_divu_i32 \
@@ -1185,6 +1227,10 @@ helper_gvec_sar8i \
 helper_gvec_sar16i \
 helper_gvec_sar32i \
 helper_gvec_sar64i \
+helper_gvec_rotl8i \
+helper_gvec_rotl16i \
+helper_gvec_rotl32i \
+helper_gvec_rotl64i \
 helper_gvec_shl8v \
 helper_gvec_shl16v \
 helper_gvec_shl32v \
@@ -1197,6 +1243,14 @@ helper_gvec_sar8v \
 helper_gvec_sar16v \
 helper_gvec_sar32v \
 helper_gvec_sar64v \
+helper_gvec_rotl8v \
+helper_gvec_rotl16v \
+helper_gvec_rotl32v \
+helper_gvec_rotl64v \
+helper_gvec_rotr8v \
+helper_gvec_rotr16v \
+helper_gvec_rotr32v \
+helper_gvec_rotr64v \
 helper_gvec_eq8 \
 helper_gvec_ne8 \
 helper_gvec_lt8 \
@@ -1422,6 +1476,7 @@ helper_xrstor \
 helper_xgetbv \
 helper_xsetbv \
 update_mxcsr_status \
+update_mxcsr_from_sse_status \
 helper_ldmxcsr \
 helper_enter_mmx \
 helper_emms \
@@ -2604,6 +2659,102 @@ cmtst_op \
 sri_op \
 usra_op \
 ssra_op \
+gen_gvec_ceq0 \
+gen_gvec_cge0 \
+gen_gvec_cgt0 \
+gen_gvec_cle0 \
+gen_gvec_clt0 \
+gen_gvec_cmtst \
+gen_gvec_mla \
+gen_gvec_mls \
+gen_gvec_saba \
+gen_gvec_sabd \
+gen_gvec_sli \
+gen_gvec_sqadd_qc \
+gen_gvec_sqrdmlah_qc \
+gen_gvec_sqrdmlsh_qc \
+gen_gvec_sqsub_qc \
+gen_gvec_sri \
+gen_gvec_srshr \
+gen_gvec_srsra \
+gen_gvec_sshl \
+gen_gvec_ssra \
+gen_gvec_uaba \
+gen_gvec_uabd \
+gen_gvec_uqadd_qc \
+gen_gvec_uqsub_qc \
+gen_gvec_urshr \
+gen_gvec_ursra \
+gen_gvec_ushl \
+gen_gvec_usra \
+helper_crypto_rax1 \
+helper_crypto_sha1c \
+helper_crypto_sha1m \
+helper_crypto_sha1p \
+helper_crypto_sha1su0 \
+helper_crypto_sm3tt1a \
+helper_crypto_sm3tt1b \
+helper_crypto_sm3tt2a \
+helper_crypto_sm3tt2b \
+helper_gvec_ceq0_b \
+helper_gvec_ceq0_h \
+helper_gvec_cge0_b \
+helper_gvec_cge0_h \
+helper_gvec_cgt0_b \
+helper_gvec_cgt0_h \
+helper_gvec_cle0_b \
+helper_gvec_cle0_h \
+helper_gvec_clt0_b \
+helper_gvec_clt0_h \
+helper_gvec_fabd_s \
+helper_gvec_saba_b \
+helper_gvec_saba_d \
+helper_gvec_saba_h \
+helper_gvec_saba_s \
+helper_gvec_sabd_b \
+helper_gvec_sabd_d \
+helper_gvec_sabd_h \
+helper_gvec_sabd_s \
+helper_gvec_sli_b \
+helper_gvec_sli_d \
+helper_gvec_sli_h \
+helper_gvec_sli_s \
+helper_gvec_sri_b \
+helper_gvec_sri_d \
+helper_gvec_sri_h \
+helper_gvec_sri_s \
+helper_gvec_srshr_b \
+helper_gvec_srshr_d \
+helper_gvec_srshr_h \
+helper_gvec_srshr_s \
+helper_gvec_srsra_b \
+helper_gvec_srsra_d \
+helper_gvec_srsra_h \
+helper_gvec_srsra_s \
+helper_gvec_ssra_b \
+helper_gvec_ssra_d \
+helper_gvec_ssra_h \
+helper_gvec_ssra_s \
+helper_gvec_uaba_b \
+helper_gvec_uaba_d \
+helper_gvec_uaba_h \
+helper_gvec_uaba_s \
+helper_gvec_uabd_b \
+helper_gvec_uabd_d \
+helper_gvec_uabd_h \
+helper_gvec_uabd_s \
+helper_gvec_urshr_b \
+helper_gvec_urshr_d \
+helper_gvec_urshr_h \
+helper_gvec_urshr_s \
+helper_gvec_ursra_b \
+helper_gvec_ursra_d \
+helper_gvec_ursra_h \
+helper_gvec_ursra_s \
+helper_gvec_usra_b \
+helper_gvec_usra_d \
+helper_gvec_usra_h \
+helper_gvec_usra_s \
 "
 
 aarch64_SYMBOLS="
@@ -2930,6 +3081,11 @@ helper_v7m_tt \
 arm_v7m_mmu_idx_all \
 arm_v7m_mmu_idx_for_secstate_and_priv \
 arm_v7m_mmu_idx_for_secstate \
+mte_probe1 \
+mte_check1 \
+mte_checkN \
+gen_helper_mte_check1 \
+gen_helper_mte_checkN \
 helper_neon_qadd_u8 \
 helper_neon_qadd_u16 \
 helper_neon_qadd_u32 \
@@ -3169,6 +3325,21 @@ helper_autda \
 helper_autdb \
 helper_xpaci \
 helper_xpacd \
+helper_mte_check1 \
+helper_mte_checkN \
+helper_mte_check_zva \
+helper_irg \
+helper_addsubg \
+helper_ldg \
+helper_stg \
+helper_stg_parallel \
+helper_stg_stub \
+helper_st2g \
+helper_st2g_parallel \
+helper_st2g_stub \
+helper_ldgm \
+helper_stgm \
+helper_stzgm_tags \
 arm_is_psci_call \
 arm_handle_psci_call \
 helper_sve_predtest1 \
@@ -4061,6 +4232,7 @@ a64_translate_init \
 gen_a64_set_pc_im \
 unallocated_encoding \
 new_tmp_a64 \
+new_tmp_a64_local \
 new_tmp_a64_zero \
 cpu_reg \
 cpu_reg_sp \
@@ -4381,6 +4553,7 @@ helper_sret \
 helper_mret \
 helper_wfi \
 helper_tlb_flush \
+helper_hyp_tlb_flush \
 pmp_hart_has_privs \
 pmpcfg_csr_write \
 pmpcfg_csr_read \
@@ -4401,6 +4574,1008 @@ helper_fcvt_d_lu \
 gen_helper_tlb_flush \
 riscv_fpr_regnames \
 riscv_int_regnames \
+fclass_d \
+fclass_h \
+fclass_s \
+helper_vaadd_vv_b \
+helper_vaadd_vv_d \
+helper_vaadd_vv_h \
+helper_vaadd_vv_w \
+helper_vaadd_vx_b \
+helper_vaadd_vx_d \
+helper_vaadd_vx_h \
+helper_vaadd_vx_w \
+helper_vadc_vvm_b \
+helper_vadc_vvm_d \
+helper_vadc_vvm_h \
+helper_vadc_vvm_w \
+helper_vadc_vxm_b \
+helper_vadc_vxm_d \
+helper_vadc_vxm_h \
+helper_vadc_vxm_w \
+helper_vadd_vv_b \
+helper_vadd_vv_d \
+helper_vadd_vv_h \
+helper_vadd_vv_w \
+helper_vadd_vx_b \
+helper_vadd_vx_d \
+helper_vadd_vx_h \
+helper_vadd_vx_w \
+helper_vamoaddw_v_w \
+helper_vamoandw_v_w \
+helper_vamomaxuw_v_w \
+helper_vamomaxw_v_w \
+helper_vamominuw_v_w \
+helper_vamominw_v_w \
+helper_vamoorw_v_w \
+helper_vamoswapw_v_w \
+helper_vamoxorw_v_w \
+helper_vand_vv_b \
+helper_vand_vv_d \
+helper_vand_vv_h \
+helper_vand_vv_w \
+helper_vand_vx_b \
+helper_vand_vx_d \
+helper_vand_vx_h \
+helper_vand_vx_w \
+helper_vasub_vv_b \
+helper_vasub_vv_d \
+helper_vasub_vv_h \
+helper_vasub_vv_w \
+helper_vasub_vx_b \
+helper_vasub_vx_d \
+helper_vasub_vx_h \
+helper_vasub_vx_w \
+helper_vcompress_vm_b \
+helper_vcompress_vm_d \
+helper_vcompress_vm_h \
+helper_vcompress_vm_w \
+helper_vdiv_vv_b \
+helper_vdiv_vv_d \
+helper_vdiv_vv_h \
+helper_vdiv_vv_w \
+helper_vdiv_vx_b \
+helper_vdiv_vx_d \
+helper_vdiv_vx_h \
+helper_vdiv_vx_w \
+helper_vdivu_vv_b \
+helper_vdivu_vv_d \
+helper_vdivu_vv_h \
+helper_vdivu_vv_w \
+helper_vdivu_vx_b \
+helper_vdivu_vx_d \
+helper_vdivu_vx_h \
+helper_vdivu_vx_w \
+helper_vec_rsubs16 \
+helper_vec_rsubs32 \
+helper_vec_rsubs64 \
+helper_vec_rsubs8 \
+helper_vfadd_vf_d \
+helper_vfadd_vf_h \
+helper_vfadd_vf_w \
+helper_vfadd_vv_d \
+helper_vfadd_vv_h \
+helper_vfadd_vv_w \
+helper_vfclass_v_d \
+helper_vfclass_v_h \
+helper_vfclass_v_w \
+helper_vfcvt_f_x_v_d \
+helper_vfcvt_f_x_v_h \
+helper_vfcvt_f_x_v_w \
+helper_vfcvt_f_xu_v_d \
+helper_vfcvt_f_xu_v_h \
+helper_vfcvt_f_xu_v_w \
+helper_vfcvt_x_f_v_d \
+helper_vfcvt_x_f_v_h \
+helper_vfcvt_x_f_v_w \
+helper_vfcvt_xu_f_v_d \
+helper_vfcvt_xu_f_v_h \
+helper_vfcvt_xu_f_v_w \
+helper_vfdiv_vf_d \
+helper_vfdiv_vf_h \
+helper_vfdiv_vf_w \
+helper_vfdiv_vv_d \
+helper_vfdiv_vv_h \
+helper_vfdiv_vv_w \
+helper_vfmacc_vf_d \
+helper_vfmacc_vf_h \
+helper_vfmacc_vf_w \
+helper_vfmacc_vv_d \
+helper_vfmacc_vv_h \
+helper_vfmacc_vv_w \
+helper_vfmadd_vf_d \
+helper_vfmadd_vf_h \
+helper_vfmadd_vf_w \
+helper_vfmadd_vv_d \
+helper_vfmadd_vv_h \
+helper_vfmadd_vv_w \
+helper_vfmax_vf_d \
+helper_vfmax_vf_h \
+helper_vfmax_vf_w \
+helper_vfmax_vv_d \
+helper_vfmax_vv_h \
+helper_vfmax_vv_w \
+helper_vfmerge_vfm_d \
+helper_vfmerge_vfm_h \
+helper_vfmerge_vfm_w \
+helper_vfmin_vf_d \
+helper_vfmin_vf_h \
+helper_vfmin_vf_w \
+helper_vfmin_vv_d \
+helper_vfmin_vv_h \
+helper_vfmin_vv_w \
+helper_vfmsac_vf_d \
+helper_vfmsac_vf_h \
+helper_vfmsac_vf_w \
+helper_vfmsac_vv_d \
+helper_vfmsac_vv_h \
+helper_vfmsac_vv_w \
+helper_vfmsub_vf_d \
+helper_vfmsub_vf_h \
+helper_vfmsub_vf_w \
+helper_vfmsub_vv_d \
+helper_vfmsub_vv_h \
+helper_vfmsub_vv_w \
+helper_vfmul_vf_d \
+helper_vfmul_vf_h \
+helper_vfmul_vf_w \
+helper_vfmul_vv_d \
+helper_vfmul_vv_h \
+helper_vfmul_vv_w \
+helper_vfncvt_f_f_v_h \
+helper_vfncvt_f_f_v_w \
+helper_vfncvt_f_x_v_h \
+helper_vfncvt_f_x_v_w \
+helper_vfncvt_f_xu_v_h \
+helper_vfncvt_f_xu_v_w \
+helper_vfncvt_x_f_v_h \
+helper_vfncvt_x_f_v_w \
+helper_vfncvt_xu_f_v_h \
+helper_vfncvt_xu_f_v_w \
+helper_vfnmacc_vf_d \
+helper_vfnmacc_vf_h \
+helper_vfnmacc_vf_w \
+helper_vfnmacc_vv_d \
+helper_vfnmacc_vv_h \
+helper_vfnmacc_vv_w \
+helper_vfnmadd_vf_d \
+helper_vfnmadd_vf_h \
+helper_vfnmadd_vf_w \
+helper_vfnmadd_vv_d \
+helper_vfnmadd_vv_h \
+helper_vfnmadd_vv_w \
+helper_vfnmsac_vf_d \
+helper_vfnmsac_vf_h \
+helper_vfnmsac_vf_w \
+helper_vfnmsac_vv_d \
+helper_vfnmsac_vv_h \
+helper_vfnmsac_vv_w \
+helper_vfnmsub_vf_d \
+helper_vfnmsub_vf_h \
+helper_vfnmsub_vf_w \
+helper_vfnmsub_vv_d \
+helper_vfnmsub_vv_h \
+helper_vfnmsub_vv_w \
+helper_vfrdiv_vf_d \
+helper_vfrdiv_vf_h \
+helper_vfrdiv_vf_w \
+helper_vfredmax_vs_d \
+helper_vfredmax_vs_h \
+helper_vfredmax_vs_w \
+helper_vfredmin_vs_d \
+helper_vfredmin_vs_h \
+helper_vfredmin_vs_w \
+helper_vfredsum_vs_d \
+helper_vfredsum_vs_h \
+helper_vfredsum_vs_w \
+helper_vfrsub_vf_d \
+helper_vfrsub_vf_h \
+helper_vfrsub_vf_w \
+helper_vfsgnj_vf_d \
+helper_vfsgnj_vf_h \
+helper_vfsgnj_vf_w \
+helper_vfsgnj_vv_d \
+helper_vfsgnj_vv_h \
+helper_vfsgnj_vv_w \
+helper_vfsgnjn_vf_d \
+helper_vfsgnjn_vf_h \
+helper_vfsgnjn_vf_w \
+helper_vfsgnjn_vv_d \
+helper_vfsgnjn_vv_h \
+helper_vfsgnjn_vv_w \
+helper_vfsgnjx_vf_d \
+helper_vfsgnjx_vf_h \
+helper_vfsgnjx_vf_w \
+helper_vfsgnjx_vv_d \
+helper_vfsgnjx_vv_h \
+helper_vfsgnjx_vv_w \
+helper_vfsqrt_v_d \
+helper_vfsqrt_v_h \
+helper_vfsqrt_v_w \
+helper_vfsub_vf_d \
+helper_vfsub_vf_h \
+helper_vfsub_vf_w \
+helper_vfsub_vv_d \
+helper_vfsub_vv_h \
+helper_vfsub_vv_w \
+helper_vfwadd_vf_h \
+helper_vfwadd_vf_w \
+helper_vfwadd_vv_h \
+helper_vfwadd_vv_w \
+helper_vfwadd_wf_h \
+helper_vfwadd_wf_w \
+helper_vfwadd_wv_h \
+helper_vfwadd_wv_w \
+helper_vfwcvt_f_f_v_h \
+helper_vfwcvt_f_f_v_w \
+helper_vfwcvt_f_x_v_h \
+helper_vfwcvt_f_x_v_w \
+helper_vfwcvt_f_xu_v_h \
+helper_vfwcvt_f_xu_v_w \
+helper_vfwcvt_x_f_v_h \
+helper_vfwcvt_x_f_v_w \
+helper_vfwcvt_xu_f_v_h \
+helper_vfwcvt_xu_f_v_w \
+helper_vfwmacc_vf_h \
+helper_vfwmacc_vf_w \
+helper_vfwmacc_vv_h \
+helper_vfwmacc_vv_w \
+helper_vfwmsac_vf_h \
+helper_vfwmsac_vf_w \
+helper_vfwmsac_vv_h \
+helper_vfwmsac_vv_w \
+helper_vfwmul_vf_h \
+helper_vfwmul_vf_w \
+helper_vfwmul_vv_h \
+helper_vfwmul_vv_w \
+helper_vfwnmacc_vf_h \
+helper_vfwnmacc_vf_w \
+helper_vfwnmacc_vv_h \
+helper_vfwnmacc_vv_w \
+helper_vfwnmsac_vf_h \
+helper_vfwnmsac_vf_w \
+helper_vfwnmsac_vv_h \
+helper_vfwnmsac_vv_w \
+helper_vfwredsum_vs_h \
+helper_vfwredsum_vs_w \
+helper_vfwsub_vf_h \
+helper_vfwsub_vf_w \
+helper_vfwsub_vv_h \
+helper_vfwsub_vv_w \
+helper_vfwsub_wf_h \
+helper_vfwsub_wf_w \
+helper_vfwsub_wv_h \
+helper_vfwsub_wv_w \
+helper_vid_v_b \
+helper_vid_v_d \
+helper_vid_v_h \
+helper_vid_v_w \
+helper_viota_m_b \
+helper_viota_m_d \
+helper_viota_m_h \
+helper_viota_m_w \
+helper_vlb_v_b \
+helper_vlb_v_b_mask \
+helper_vlb_v_d \
+helper_vlb_v_d_mask \
+helper_vlb_v_h \
+helper_vlb_v_h_mask \
+helper_vlb_v_w \
+helper_vlb_v_w_mask \
+helper_vlbff_v_b \
+helper_vlbff_v_d \
+helper_vlbff_v_h \
+helper_vlbff_v_w \
+helper_vlbu_v_b \
+helper_vlbu_v_b_mask \
+helper_vlbu_v_d \
+helper_vlbu_v_d_mask \
+helper_vlbu_v_h \
+helper_vlbu_v_h_mask \
+helper_vlbu_v_w \
+helper_vlbu_v_w_mask \
+helper_vlbuff_v_b \
+helper_vlbuff_v_d \
+helper_vlbuff_v_h \
+helper_vlbuff_v_w \
+helper_vle_v_b \
+helper_vle_v_b_mask \
+helper_vle_v_d \
+helper_vle_v_d_mask \
+helper_vle_v_h \
+helper_vle_v_h_mask \
+helper_vle_v_w \
+helper_vle_v_w_mask \
+helper_vleff_v_b \
+helper_vleff_v_d \
+helper_vleff_v_h \
+helper_vleff_v_w \
+helper_vlh_v_d \
+helper_vlh_v_d_mask \
+helper_vlh_v_h \
+helper_vlh_v_h_mask \
+helper_vlh_v_w \
+helper_vlh_v_w_mask \
+helper_vlhff_v_d \
+helper_vlhff_v_h \
+helper_vlhff_v_w \
+helper_vlhu_v_d \
+helper_vlhu_v_d_mask \
+helper_vlhu_v_h \
+helper_vlhu_v_h_mask \
+helper_vlhu_v_w \
+helper_vlhu_v_w_mask \
+helper_vlhuff_v_d \
+helper_vlhuff_v_h \
+helper_vlhuff_v_w \
+helper_vlsb_v_b \
+helper_vlsb_v_d \
+helper_vlsb_v_h \
+helper_vlsb_v_w \
+helper_vlsbu_v_b \
+helper_vlsbu_v_d \
+helper_vlsbu_v_h \
+helper_vlsbu_v_w \
+helper_vlse_v_b \
+helper_vlse_v_d \
+helper_vlse_v_h \
+helper_vlse_v_w \
+helper_vlsh_v_d \
+helper_vlsh_v_h \
+helper_vlsh_v_w \
+helper_vlshu_v_d \
+helper_vlshu_v_h \
+helper_vlshu_v_w \
+helper_vlsw_v_d \
+helper_vlsw_v_w \
+helper_vlswu_v_d \
+helper_vlswu_v_w \
+helper_vlw_v_d \
+helper_vlw_v_d_mask \
+helper_vlw_v_w \
+helper_vlw_v_w_mask \
+helper_vlwff_v_d \
+helper_vlwff_v_w \
+helper_vlwu_v_d \
+helper_vlwu_v_d_mask \
+helper_vlwu_v_w \
+helper_vlwu_v_w_mask \
+helper_vlwuff_v_d \
+helper_vlwuff_v_w \
+helper_vlxb_v_b \
+helper_vlxb_v_d \
+helper_vlxb_v_h \
+helper_vlxb_v_w \
+helper_vlxbu_v_b \
+helper_vlxbu_v_d \
+helper_vlxbu_v_h \
+helper_vlxbu_v_w \
+helper_vlxe_v_b \
+helper_vlxe_v_d \
+helper_vlxe_v_h \
+helper_vlxe_v_w \
+helper_vlxh_v_d \
+helper_vlxh_v_h \
+helper_vlxh_v_w \
+helper_vlxhu_v_d \
+helper_vlxhu_v_h \
+helper_vlxhu_v_w \
+helper_vlxw_v_d \
+helper_vlxw_v_w \
+helper_vlxwu_v_d \
+helper_vlxwu_v_w \
+helper_vmacc_vv_b \
+helper_vmacc_vv_d \
+helper_vmacc_vv_h \
+helper_vmacc_vv_w \
+helper_vmacc_vx_b \
+helper_vmacc_vx_d \
+helper_vmacc_vx_h \
+helper_vmacc_vx_w \
+helper_vmadc_vvm_b \
+helper_vmadc_vvm_d \
+helper_vmadc_vvm_h \
+helper_vmadc_vvm_w \
+helper_vmadc_vxm_b \
+helper_vmadc_vxm_d \
+helper_vmadc_vxm_h \
+helper_vmadc_vxm_w \
+helper_vmadd_vv_b \
+helper_vmadd_vv_d \
+helper_vmadd_vv_h \
+helper_vmadd_vv_w \
+helper_vmadd_vx_b \
+helper_vmadd_vx_d \
+helper_vmadd_vx_h \
+helper_vmadd_vx_w \
+helper_vmand_mm \
+helper_vmandnot_mm \
+helper_vmax_vv_b \
+helper_vmax_vv_d \
+helper_vmax_vv_h \
+helper_vmax_vv_w \
+helper_vmax_vx_b \
+helper_vmax_vx_d \
+helper_vmax_vx_h \
+helper_vmax_vx_w \
+helper_vmaxu_vv_b \
+helper_vmaxu_vv_d \
+helper_vmaxu_vv_h \
+helper_vmaxu_vv_w \
+helper_vmaxu_vx_b \
+helper_vmaxu_vx_d \
+helper_vmaxu_vx_h \
+helper_vmaxu_vx_w \
+helper_vmerge_vvm_b \
+helper_vmerge_vvm_d \
+helper_vmerge_vvm_h \
+helper_vmerge_vvm_w \
+helper_vmerge_vxm_b \
+helper_vmerge_vxm_d \
+helper_vmerge_vxm_h \
+helper_vmerge_vxm_w \
+helper_vmfeq_vf_d \
+helper_vmfeq_vf_h \
+helper_vmfeq_vf_w \
+helper_vmfeq_vv_d \
+helper_vmfeq_vv_h \
+helper_vmfeq_vv_w \
+helper_vmfge_vf_d \
+helper_vmfge_vf_h \
+helper_vmfge_vf_w \
+helper_vmfgt_vf_d \
+helper_vmfgt_vf_h \
+helper_vmfgt_vf_w \
+helper_vmfirst_m \
+helper_vmfle_vf_d \
+helper_vmfle_vf_h \
+helper_vmfle_vf_w \
+helper_vmfle_vv_d \
+helper_vmfle_vv_h \
+helper_vmfle_vv_w \
+helper_vmflt_vf_d \
+helper_vmflt_vf_h \
+helper_vmflt_vf_w \
+helper_vmflt_vv_d \
+helper_vmflt_vv_h \
+helper_vmflt_vv_w \
+helper_vmfne_vf_d \
+helper_vmfne_vf_h \
+helper_vmfne_vf_w \
+helper_vmfne_vv_d \
+helper_vmfne_vv_h \
+helper_vmfne_vv_w \
+helper_vmford_vf_d \
+helper_vmford_vf_h \
+helper_vmford_vf_w \
+helper_vmford_vv_d \
+helper_vmford_vv_h \
+helper_vmford_vv_w \
+helper_vmin_vv_b \
+helper_vmin_vv_d \
+helper_vmin_vv_h \
+helper_vmin_vv_w \
+helper_vmin_vx_b \
+helper_vmin_vx_d \
+helper_vmin_vx_h \
+helper_vmin_vx_w \
+helper_vminu_vv_b \
+helper_vminu_vv_d \
+helper_vminu_vv_h \
+helper_vminu_vv_w \
+helper_vminu_vx_b \
+helper_vminu_vx_d \
+helper_vminu_vx_h \
+helper_vminu_vx_w \
+helper_vmnand_mm \
+helper_vmnor_mm \
+helper_vmor_mm \
+helper_vmornot_mm \
+helper_vmpopc_m \
+helper_vmsbc_vvm_b \
+helper_vmsbc_vvm_d \
+helper_vmsbc_vvm_h \
+helper_vmsbc_vvm_w \
+helper_vmsbc_vxm_b \
+helper_vmsbc_vxm_d \
+helper_vmsbc_vxm_h \
+helper_vmsbc_vxm_w \
+helper_vmsbf_m \
+helper_vmseq_vv_b \
+helper_vmseq_vv_d \
+helper_vmseq_vv_h \
+helper_vmseq_vv_w \
+helper_vmseq_vx_b \
+helper_vmseq_vx_d \
+helper_vmseq_vx_h \
+helper_vmseq_vx_w \
+helper_vmsgt_vx_b \
+helper_vmsgt_vx_d \
+helper_vmsgt_vx_h \
+helper_vmsgt_vx_w \
+helper_vmsgtu_vx_b \
+helper_vmsgtu_vx_d \
+helper_vmsgtu_vx_h \
+helper_vmsgtu_vx_w \
+helper_vmsif_m \
+helper_vmsle_vv_b \
+helper_vmsle_vv_d \
+helper_vmsle_vv_h \
+helper_vmsle_vv_w \
+helper_vmsle_vx_b \
+helper_vmsle_vx_d \
+helper_vmsle_vx_h \
+helper_vmsle_vx_w \
+helper_vmsleu_vv_b \
+helper_vmsleu_vv_d \
+helper_vmsleu_vv_h \
+helper_vmsleu_vv_w \
+helper_vmsleu_vx_b \
+helper_vmsleu_vx_d \
+helper_vmsleu_vx_h \
+helper_vmsleu_vx_w \
+helper_vmslt_vv_b \
+helper_vmslt_vv_d \
+helper_vmslt_vv_h \
+helper_vmslt_vv_w \
+helper_vmslt_vx_b \
+helper_vmslt_vx_d \
+helper_vmslt_vx_h \
+helper_vmslt_vx_w \
+helper_vmsltu_vv_b \
+helper_vmsltu_vv_d \
+helper_vmsltu_vv_h \
+helper_vmsltu_vv_w \
+helper_vmsltu_vx_b \
+helper_vmsltu_vx_d \
+helper_vmsltu_vx_h \
+helper_vmsltu_vx_w \
+helper_vmsne_vv_b \
+helper_vmsne_vv_d \
+helper_vmsne_vv_h \
+helper_vmsne_vv_w \
+helper_vmsne_vx_b \
+helper_vmsne_vx_d \
+helper_vmsne_vx_h \
+helper_vmsne_vx_w \
+helper_vmsof_m \
+helper_vmul_vv_b \
+helper_vmul_vv_d \
+helper_vmul_vv_h \
+helper_vmul_vv_w \
+helper_vmul_vx_b \
+helper_vmul_vx_d \
+helper_vmul_vx_h \
+helper_vmul_vx_w \
+helper_vmulh_vv_b \
+helper_vmulh_vv_d \
+helper_vmulh_vv_h \
+helper_vmulh_vv_w \
+helper_vmulh_vx_b \
+helper_vmulh_vx_d \
+helper_vmulh_vx_h \
+helper_vmulh_vx_w \
+helper_vmulhsu_vv_b \
+helper_vmulhsu_vv_d \
+helper_vmulhsu_vv_h \
+helper_vmulhsu_vv_w \
+helper_vmulhsu_vx_b \
+helper_vmulhsu_vx_d \
+helper_vmulhsu_vx_h \
+helper_vmulhsu_vx_w \
+helper_vmulhu_vv_b \
+helper_vmulhu_vv_d \
+helper_vmulhu_vv_h \
+helper_vmulhu_vv_w \
+helper_vmulhu_vx_b \
+helper_vmulhu_vx_d \
+helper_vmulhu_vx_h \
+helper_vmulhu_vx_w \
+helper_vmv_v_v_b \
+helper_vmv_v_v_d \
+helper_vmv_v_v_h \
+helper_vmv_v_v_w \
+helper_vmv_v_x_b \
+helper_vmv_v_x_d \
+helper_vmv_v_x_h \
+helper_vmv_v_x_w \
+helper_vmxnor_mm \
+helper_vmxor_mm \
+helper_vnclip_vv_b \
+helper_vnclip_vv_h \
+helper_vnclip_vv_w \
+helper_vnclip_vx_b \
+helper_vnclip_vx_h \
+helper_vnclip_vx_w \
+helper_vnclipu_vv_b \
+helper_vnclipu_vv_h \
+helper_vnclipu_vv_w \
+helper_vnclipu_vx_b \
+helper_vnclipu_vx_h \
+helper_vnclipu_vx_w \
+helper_vnmsac_vv_b \
+helper_vnmsac_vv_d \
+helper_vnmsac_vv_h \
+helper_vnmsac_vv_w \
+helper_vnmsac_vx_b \
+helper_vnmsac_vx_d \
+helper_vnmsac_vx_h \
+helper_vnmsac_vx_w \
+helper_vnmsub_vv_b \
+helper_vnmsub_vv_d \
+helper_vnmsub_vv_h \
+helper_vnmsub_vv_w \
+helper_vnmsub_vx_b \
+helper_vnmsub_vx_d \
+helper_vnmsub_vx_h \
+helper_vnmsub_vx_w \
+helper_vnsra_vv_b \
+helper_vnsra_vv_h \
+helper_vnsra_vv_w \
+helper_vnsra_vx_b \
+helper_vnsra_vx_h \
+helper_vnsra_vx_w \
+helper_vnsrl_vv_b \
+helper_vnsrl_vv_h \
+helper_vnsrl_vv_w \
+helper_vnsrl_vx_b \
+helper_vnsrl_vx_h \
+helper_vnsrl_vx_w \
+helper_vor_vv_b \
+helper_vor_vv_d \
+helper_vor_vv_h \
+helper_vor_vv_w \
+helper_vor_vx_b \
+helper_vor_vx_d \
+helper_vor_vx_h \
+helper_vor_vx_w \
+helper_vredand_vs_b \
+helper_vredand_vs_d \
+helper_vredand_vs_h \
+helper_vredand_vs_w \
+helper_vredmax_vs_b \
+helper_vredmax_vs_d \
+helper_vredmax_vs_h \
+helper_vredmax_vs_w \
+helper_vredmaxu_vs_b \
+helper_vredmaxu_vs_d \
+helper_vredmaxu_vs_h \
+helper_vredmaxu_vs_w \
+helper_vredmin_vs_b \
+helper_vredmin_vs_d \
+helper_vredmin_vs_h \
+helper_vredmin_vs_w \
+helper_vredminu_vs_b \
+helper_vredminu_vs_d \
+helper_vredminu_vs_h \
+helper_vredminu_vs_w \
+helper_vredor_vs_b \
+helper_vredor_vs_d \
+helper_vredor_vs_h \
+helper_vredor_vs_w \
+helper_vredsum_vs_b \
+helper_vredsum_vs_d \
+helper_vredsum_vs_h \
+helper_vredsum_vs_w \
+helper_vredxor_vs_b \
+helper_vredxor_vs_d \
+helper_vredxor_vs_h \
+helper_vredxor_vs_w \
+helper_vrem_vv_b \
+helper_vrem_vv_d \
+helper_vrem_vv_h \
+helper_vrem_vv_w \
+helper_vrem_vx_b \
+helper_vrem_vx_d \
+helper_vrem_vx_h \
+helper_vrem_vx_w \
+helper_vremu_vv_b \
+helper_vremu_vv_d \
+helper_vremu_vv_h \
+helper_vremu_vv_w \
+helper_vremu_vx_b \
+helper_vremu_vx_d \
+helper_vremu_vx_h \
+helper_vremu_vx_w \
+helper_vrgather_vv_b \
+helper_vrgather_vv_d \
+helper_vrgather_vv_h \
+helper_vrgather_vv_w \
+helper_vrgather_vx_b \
+helper_vrgather_vx_d \
+helper_vrgather_vx_h \
+helper_vrgather_vx_w \
+helper_vrsub_vx_b \
+helper_vrsub_vx_d \
+helper_vrsub_vx_h \
+helper_vrsub_vx_w \
+helper_vsadd_vv_b \
+helper_vsadd_vv_d \
+helper_vsadd_vv_h \
+helper_vsadd_vv_w \
+helper_vsadd_vx_b \
+helper_vsadd_vx_d \
+helper_vsadd_vx_h \
+helper_vsadd_vx_w \
+helper_vsaddu_vv_b \
+helper_vsaddu_vv_d \
+helper_vsaddu_vv_h \
+helper_vsaddu_vv_w \
+helper_vsaddu_vx_b \
+helper_vsaddu_vx_d \
+helper_vsaddu_vx_h \
+helper_vsaddu_vx_w \
+helper_vsb_v_b \
+helper_vsb_v_b_mask \
+helper_vsb_v_d \
+helper_vsb_v_d_mask \
+helper_vsb_v_h \
+helper_vsb_v_h_mask \
+helper_vsb_v_w \
+helper_vsb_v_w_mask \
+helper_vsbc_vvm_b \
+helper_vsbc_vvm_d \
+helper_vsbc_vvm_h \
+helper_vsbc_vvm_w \
+helper_vsbc_vxm_b \
+helper_vsbc_vxm_d \
+helper_vsbc_vxm_h \
+helper_vsbc_vxm_w \
+helper_vse_v_b \
+helper_vse_v_b_mask \
+helper_vse_v_d \
+helper_vse_v_d_mask \
+helper_vse_v_h \
+helper_vse_v_h_mask \
+helper_vse_v_w \
+helper_vse_v_w_mask \
+helper_vsetvl \
+helper_vsh_v_d \
+helper_vsh_v_d_mask \
+helper_vsh_v_h \
+helper_vsh_v_h_mask \
+helper_vsh_v_w \
+helper_vsh_v_w_mask \
+helper_vslide1down_vx_b \
+helper_vslide1down_vx_d \
+helper_vslide1down_vx_h \
+helper_vslide1down_vx_w \
+helper_vslide1up_vx_b \
+helper_vslide1up_vx_d \
+helper_vslide1up_vx_h \
+helper_vslide1up_vx_w \
+helper_vslidedown_vx_b \
+helper_vslidedown_vx_d \
+helper_vslidedown_vx_h \
+helper_vslidedown_vx_w \
+helper_vslideup_vx_b \
+helper_vslideup_vx_d \
+helper_vslideup_vx_h \
+helper_vslideup_vx_w \
+helper_vsll_vv_b \
+helper_vsll_vv_d \
+helper_vsll_vv_h \
+helper_vsll_vv_w \
+helper_vsll_vx_b \
+helper_vsll_vx_d \
+helper_vsll_vx_h \
+helper_vsll_vx_w \
+helper_vsmul_vv_b \
+helper_vsmul_vv_d \
+helper_vsmul_vv_h \
+helper_vsmul_vv_w \
+helper_vsmul_vx_b \
+helper_vsmul_vx_d \
+helper_vsmul_vx_h \
+helper_vsmul_vx_w \
+helper_vsra_vv_b \
+helper_vsra_vv_d \
+helper_vsra_vv_h \
+helper_vsra_vv_w \
+helper_vsra_vx_b \
+helper_vsra_vx_d \
+helper_vsra_vx_h \
+helper_vsra_vx_w \
+helper_vsrl_vv_b \
+helper_vsrl_vv_d \
+helper_vsrl_vv_h \
+helper_vsrl_vv_w \
+helper_vsrl_vx_b \
+helper_vsrl_vx_d \
+helper_vsrl_vx_h \
+helper_vsrl_vx_w \
+helper_vssb_v_b \
+helper_vssb_v_d \
+helper_vssb_v_h \
+helper_vssb_v_w \
+helper_vsse_v_b \
+helper_vsse_v_d \
+helper_vsse_v_h \
+helper_vsse_v_w \
+helper_vssh_v_d \
+helper_vssh_v_h \
+helper_vssh_v_w \
+helper_vssra_vv_b \
+helper_vssra_vv_d \
+helper_vssra_vv_h \
+helper_vssra_vv_w \
+helper_vssra_vx_b \
+helper_vssra_vx_d \
+helper_vssra_vx_h \
+helper_vssra_vx_w \
+helper_vssrl_vv_b \
+helper_vssrl_vv_d \
+helper_vssrl_vv_h \
+helper_vssrl_vv_w \
+helper_vssrl_vx_b \
+helper_vssrl_vx_d \
+helper_vssrl_vx_h \
+helper_vssrl_vx_w \
+helper_vssub_vv_b \
+helper_vssub_vv_d \
+helper_vssub_vv_h \
+helper_vssub_vv_w \
+helper_vssub_vx_b \
+helper_vssub_vx_d \
+helper_vssub_vx_h \
+helper_vssub_vx_w \
+helper_vssubu_vv_b \
+helper_vssubu_vv_d \
+helper_vssubu_vv_h \
+helper_vssubu_vv_w \
+helper_vssubu_vx_b \
+helper_vssubu_vx_d \
+helper_vssubu_vx_h \
+helper_vssubu_vx_w \
+helper_vssw_v_d \
+helper_vssw_v_w \
+helper_vsub_vv_b \
+helper_vsub_vv_d \
+helper_vsub_vv_h \
+helper_vsub_vv_w \
+helper_vsub_vx_b \
+helper_vsub_vx_d \
+helper_vsub_vx_h \
+helper_vsub_vx_w \
+helper_vsw_v_d \
+helper_vsw_v_d_mask \
+helper_vsw_v_w \
+helper_vsw_v_w_mask \
+helper_vsxb_v_b \
+helper_vsxb_v_d \
+helper_vsxb_v_h \
+helper_vsxb_v_w \
+helper_vsxe_v_b \
+helper_vsxe_v_d \
+helper_vsxe_v_h \
+helper_vsxe_v_w \
+helper_vsxh_v_d \
+helper_vsxh_v_h \
+helper_vsxh_v_w \
+helper_vsxw_v_d \
+helper_vsxw_v_w \
+helper_vwadd_vv_b \
+helper_vwadd_vv_h \
+helper_vwadd_vv_w \
+helper_vwadd_vx_b \
+helper_vwadd_vx_h \
+helper_vwadd_vx_w \
+helper_vwadd_wv_b \
+helper_vwadd_wv_h \
+helper_vwadd_wv_w \
+helper_vwadd_wx_b \
+helper_vwadd_wx_h \
+helper_vwadd_wx_w \
+helper_vwaddu_vv_b \
+helper_vwaddu_vv_h \
+helper_vwaddu_vv_w \
+helper_vwaddu_vx_b \
+helper_vwaddu_vx_h \
+helper_vwaddu_vx_w \
+helper_vwaddu_wv_b \
+helper_vwaddu_wv_h \
+helper_vwaddu_wv_w \
+helper_vwaddu_wx_b \
+helper_vwaddu_wx_h \
+helper_vwaddu_wx_w \
+helper_vwmacc_vv_b \
+helper_vwmacc_vv_h \
+helper_vwmacc_vv_w \
+helper_vwmacc_vx_b \
+helper_vwmacc_vx_h \
+helper_vwmacc_vx_w \
+helper_vwmaccsu_vv_b \
+helper_vwmaccsu_vv_h \
+helper_vwmaccsu_vv_w \
+helper_vwmaccsu_vx_b \
+helper_vwmaccsu_vx_h \
+helper_vwmaccsu_vx_w \
+helper_vwmaccu_vv_b \
+helper_vwmaccu_vv_h \
+helper_vwmaccu_vv_w \
+helper_vwmaccu_vx_b \
+helper_vwmaccu_vx_h \
+helper_vwmaccu_vx_w \
+helper_vwmaccus_vx_b \
+helper_vwmaccus_vx_h \
+helper_vwmaccus_vx_w \
+helper_vwmul_vv_b \
+helper_vwmul_vv_h \
+helper_vwmul_vv_w \
+helper_vwmul_vx_b \
+helper_vwmul_vx_h \
+helper_vwmul_vx_w \
+helper_vwmulsu_vv_b \
+helper_vwmulsu_vv_h \
+helper_vwmulsu_vv_w \
+helper_vwmulsu_vx_b \
+helper_vwmulsu_vx_h \
+helper_vwmulsu_vx_w \
+helper_vwmulu_vv_b \
+helper_vwmulu_vv_h \
+helper_vwmulu_vv_w \
+helper_vwmulu_vx_b \
+helper_vwmulu_vx_h \
+helper_vwmulu_vx_w \
+helper_vwredsum_vs_b \
+helper_vwredsum_vs_h \
+helper_vwredsum_vs_w \
+helper_vwredsumu_vs_b \
+helper_vwredsumu_vs_h \
+helper_vwredsumu_vs_w \
+helper_vwsmacc_vv_b \
+helper_vwsmacc_vv_h \
+helper_vwsmacc_vv_w \
+helper_vwsmacc_vx_b \
+helper_vwsmacc_vx_h \
+helper_vwsmacc_vx_w \
+helper_vwsmaccsu_vv_b \
+helper_vwsmaccsu_vv_h \
+helper_vwsmaccsu_vv_w \
+helper_vwsmaccsu_vx_b \
+helper_vwsmaccsu_vx_h \
+helper_vwsmaccsu_vx_w \
+helper_vwsmaccu_vv_b \
+helper_vwsmaccu_vv_h \
+helper_vwsmaccu_vv_w \
+helper_vwsmaccu_vx_b \
+helper_vwsmaccu_vx_h \
+helper_vwsmaccu_vx_w \
+helper_vwsmaccus_vx_b \
+helper_vwsmaccus_vx_h \
+helper_vwsmaccus_vx_w \
+helper_vwsub_vv_b \
+helper_vwsub_vv_h \
+helper_vwsub_vv_w \
+helper_vwsub_vx_b \
+helper_vwsub_vx_h \
+helper_vwsub_vx_w \
+helper_vwsub_wv_b \
+helper_vwsub_wv_h \
+helper_vwsub_wv_w \
+helper_vwsub_wx_b \
+helper_vwsub_wx_h \
+helper_vwsub_wx_w \
+helper_vwsubu_vv_b \
+helper_vwsubu_vv_h \
+helper_vwsubu_vv_w \
+helper_vwsubu_vx_b \
+helper_vwsubu_vx_h \
+helper_vwsubu_vx_w \
+helper_vwsubu_wv_b \
+helper_vwsubu_wv_h \
+helper_vwsubu_wv_w \
+helper_vwsubu_wx_b \
+helper_vwsubu_wx_h \
+helper_vwsubu_wx_w \
+helper_vxor_vv_b \
+helper_vxor_vv_d \
+helper_vxor_vv_h \
+helper_vxor_vv_w \
+helper_vxor_vx_b \
+helper_vxor_vx_d \
+helper_vxor_vx_h \
+helper_vxor_vx_w \
 "
 
 riscv64_SYMBOLS=${riscv32_SYMBOLS}
@@ -4791,7 +5966,6 @@ cpu_rddsp \
 helper_rddsp \
 helper_cfc1 \
 helper_ctc1 \
-ieee_ex_to_mips \
 helper_float_sqrt_d \
 helper_float_sqrt_s \
 helper_float_cvtd_s \
@@ -5346,23 +6520,59 @@ helper_msa_srari_df \
 helper_msa_srlri_df \
 helper_msa_binsli_df \
 helper_msa_binsri_df \
-helper_msa_subv_df \
-helper_msa_subs_s_df \
-helper_msa_subs_u_df \
-helper_msa_subsus_u_df \
-helper_msa_subsuu_s_df \
-helper_msa_mulv_df \
-helper_msa_dotp_s_df \
-helper_msa_dotp_u_df \
+helper_msa_subv_b \
+helper_msa_subv_h \
+helper_msa_subv_w \
+helper_msa_subv_d \
+helper_msa_subs_s_b \
+helper_msa_subs_s_h \
+helper_msa_subs_s_w \
+helper_msa_subs_s_d \
+helper_msa_subs_u_b \
+helper_msa_subs_u_h \
+helper_msa_subs_u_w \
+helper_msa_subs_u_d \
+helper_msa_subsus_u_b \
+helper_msa_subsus_u_h \
+helper_msa_subsus_u_w \
+helper_msa_subsus_u_d \
+helper_msa_subsuu_s_b \
+helper_msa_subsuu_s_h \
+helper_msa_subsuu_s_w \
+helper_msa_subsuu_s_d \
+helper_msa_mulv_b \
+helper_msa_mulv_h \
+helper_msa_mulv_w \
+helper_msa_mulv_d \
+helper_msa_dotp_s_h \
+helper_msa_dotp_s_w \
+helper_msa_dotp_s_d \
+helper_msa_dotp_u_h \
+helper_msa_dotp_u_w \
+helper_msa_dotp_u_d \
 helper_msa_mul_q_df \
 helper_msa_mulr_q_df \
 helper_msa_sld_df \
-helper_msa_maddv_df \
-helper_msa_msubv_df \
-helper_msa_dpadd_s_df \
-helper_msa_dpadd_u_df \
-helper_msa_dpsub_s_df \
-helper_msa_dpsub_u_df \
+helper_msa_maddv_b \
+helper_msa_maddv_h \
+helper_msa_maddv_w \
+helper_msa_maddv_d \
+helper_msa_msubv_b \
+helper_msa_msubv_h \
+helper_msa_msubv_w \
+helper_msa_msubv_d \
+helper_msa_dpadd_s_h \
+helper_msa_dpadd_s_w \
+helper_msa_dpadd_s_d \
+helper_msa_dpadd_u_h \
+helper_msa_dpadd_u_w \
+helper_msa_dpadd_u_d \
+helper_msa_dpsub_s_h \
+helper_msa_dpsub_s_w \
+helper_msa_dpsub_s_d \
+helper_msa_dpsub_u_h \
+helper_msa_dpsub_u_w \
+helper_msa_dpsub_u_d \
 helper_msa_binsl_df \
 helper_msa_binsr_df \
 helper_msa_madd_q_df \
@@ -5797,7 +7007,6 @@ helper_bfffo_reg \
 helper_bfffo_mem \
 helper_chk \
 helper_chk2 \
-floatx80_mod \
 floatx80_getman \
 floatx80_getexp \
 floatx80_scale \
@@ -6110,6 +7319,33 @@ helper_stvewx \
 helper_tbegin \
 helper_load_dump_spr \
 helper_store_dump_spr \
+store_fpscr \
+helper_store_fpscr \
+helper_float_check_status \
+helper_reset_fpstatus \
+helper_fadd \
+helper_fsub \
+helper_fmul \
+helper_fdiv \
+helper_fctiw \
+helper_fctiwz \
+helper_fctiwuz \
+helper_fctid \
+helper_fctidz \
+helper_fctidu \
+helper_fctiduz \
+helper_fcfid \
+helper_fcfids \
+helper_fcfidu \
+helper_fcfidus \
+helper_frin \
+helper_friz \
+helper_frip \
+helper_frim \
+helper_fmadd \
+helper_fnmadd \
+helper_fmsub \
+helper_fnmsub \
 helper_hfscr_facility_check \
 helper_fscr_facility_check \
 helper_msr_facility_check \
@@ -6261,8 +7497,245 @@ ppc_booke_timers_init \
 ppc_hash32_handle_mmu_fault \
 gen_helper_store_booke_tsr \
 gen_helper_store_booke_tcr \
+gen_helper_store_fpscr \
 store_booke_tcr \
 ppc_hash32_get_phys_page_debug \
+helper_compute_fprf_float128 \
+helper_compute_fprf_float16 \
+helper_compute_fprf_float32 \
+helper_compute_fprf_float64 \
+helper_efdadd \
+helper_efdcfs \
+helper_efdcfsf \
+helper_efdcfsi \
+helper_efdcfsid \
+helper_efdcfuf \
+helper_efdcfui \
+helper_efdcfuid \
+helper_efdcmpeq \
+helper_efdcmpgt \
+helper_efdcmplt \
+helper_efdctsf \
+helper_efdctsi \
+helper_efdctsidz \
+helper_efdctsiz \
+helper_efdctuf \
+helper_efdctui \
+helper_efdctuidz \
+helper_efdctuiz \
+helper_efddiv \
+helper_efdmul \
+helper_efdsub \
+helper_efdtsteq \
+helper_efdtstgt \
+helper_efdtstlt \
+helper_efsadd \
+helper_efscfd \
+helper_efscfsf \
+helper_efscfsi \
+helper_efscfuf \
+helper_efscfui \
+helper_efscmpeq \
+helper_efscmpgt \
+helper_efscmplt \
+helper_efsctsf \
+helper_efsctsi \
+helper_efsctsiz \
+helper_efsctuf \
+helper_efsctui \
+helper_efsctuiz \
+helper_efsdiv \
+helper_efsmul \
+helper_efssub \
+helper_efststeq \
+helper_efststgt \
+helper_efststlt \
+helper_evfsadd \
+helper_evfscfsf \
+helper_evfscfsi \
+helper_evfscfuf \
+helper_evfscfui \
+helper_evfscmpeq \
+helper_evfscmpgt \
+helper_evfscmplt \
+helper_evfsctsf \
+helper_evfsctsi \
+helper_evfsctsiz \
+helper_evfsctuf \
+helper_evfsctui \
+helper_evfsctuiz \
+helper_evfsdiv \
+helper_evfsmul \
+helper_evfssub \
+helper_evfststeq \
+helper_evfststgt \
+helper_evfststlt \
+helper_fcmpo \
+helper_fcmpu \
+helper_fctiwu \
+helper_fpscr_clrbit \
+helper_fpscr_setbit \
+helper_fre \
+helper_fres \
+helper_frsp \
+helper_frsqrte \
+helper_fsel \
+helper_fsqrt \
+helper_ftdiv \
+helper_ftsqrt \
+helper_todouble \
+helper_tosingle \
+helper_xsadddp \
+helper_xsaddqp \
+helper_xsaddsp \
+helper_xscmpeqdp \
+helper_xscmpexpdp \
+helper_xscmpexpqp \
+helper_xscmpgedp \
+helper_xscmpgtdp \
+helper_xscmpnedp \
+helper_xscmpodp \
+helper_xscmpoqp \
+helper_xscmpudp \
+helper_xscmpuqp \
+helper_xscvdphp \
+helper_xscvdpqp \
+helper_xscvdpsp \
+helper_xscvdpspn \
+helper_xscvdpsxds \
+helper_xscvdpsxws \
+helper_xscvdpuxds \
+helper_xscvdpuxws \
+helper_xscvhpdp \
+helper_xscvqpdp \
+helper_xscvqpsdz \
+helper_xscvqpswz \
+helper_xscvqpudz \
+helper_xscvqpuwz \
+helper_xscvsdqp \
+helper_xscvspdp \
+helper_xscvspdpn \
+helper_xscvsxddp \
+helper_xscvsxdsp \
+helper_xscvudqp \
+helper_xscvuxddp \
+helper_xscvuxdsp \
+helper_xsdivdp \
+helper_xsdivqp \
+helper_xsdivsp \
+helper_xsmadddp \
+helper_xsmaddsp \
+helper_xsmaxcdp \
+helper_xsmaxdp \
+helper_xsmaxjdp \
+helper_xsmincdp \
+helper_xsmindp \
+helper_xsminjdp \
+helper_xsmsubdp \
+helper_xsmsubsp \
+helper_xsmuldp \
+helper_xsmulqp \
+helper_xsmulsp \
+helper_xsnmadddp \
+helper_xsnmaddsp \
+helper_xsnmsubdp \
+helper_xsnmsubsp \
+helper_xsrdpi \
+helper_xsrdpic \
+helper_xsrdpim \
+helper_xsrdpip \
+helper_xsrdpiz \
+helper_xsredp \
+helper_xsresp \
+helper_xsrqpi \
+helper_xsrqpxp \
+helper_xsrsp \
+helper_xsrsqrtedp \
+helper_xsrsqrtesp \
+helper_xssqrtdp \
+helper_xssqrtqp \
+helper_xssqrtsp \
+helper_xssubdp \
+helper_xssubqp \
+helper_xssubsp \
+helper_xstdivdp \
+helper_xstsqrtdp \
+helper_xststdcdp \
+helper_xststdcqp \
+helper_xststdcsp \
+helper_xvadddp \
+helper_xvaddsp \
+helper_xvcmpeqdp \
+helper_xvcmpeqsp \
+helper_xvcmpgedp \
+helper_xvcmpgesp \
+helper_xvcmpgtdp \
+helper_xvcmpgtsp \
+helper_xvcmpnedp \
+helper_xvcmpnesp \
+helper_xvcvdpsp \
+helper_xvcvdpsxds \
+helper_xvcvdpsxws \
+helper_xvcvdpuxds \
+helper_xvcvdpuxws \
+helper_xvcvhpsp \
+helper_xvcvspdp \
+helper_xvcvsphp \
+helper_xvcvspsxds \
+helper_xvcvspsxws \
+helper_xvcvspuxds \
+helper_xvcvspuxws \
+helper_xvcvsxddp \
+helper_xvcvsxdsp \
+helper_xvcvsxwdp \
+helper_xvcvsxwsp \
+helper_xvcvuxddp \
+helper_xvcvuxdsp \
+helper_xvcvuxwdp \
+helper_xvcvuxwsp \
+helper_xvdivdp \
+helper_xvdivsp \
+helper_xvmadddp \
+helper_xvmaddsp \
+helper_xvmaxdp \
+helper_xvmaxsp \
+helper_xvmindp \
+helper_xvminsp \
+helper_xvmsubdp \
+helper_xvmsubsp \
+helper_xvmuldp \
+helper_xvmulsp \
+helper_xvnmadddp \
+helper_xvnmaddsp \
+helper_xvnmsubdp \
+helper_xvnmsubsp \
+helper_xvrdpi \
+helper_xvrdpic \
+helper_xvrdpim \
+helper_xvrdpip \
+helper_xvrdpiz \
+helper_xvredp \
+helper_xvresp \
+helper_xvrspi \
+helper_xvrspic \
+helper_xvrspim \
+helper_xvrspip \
+helper_xvrspiz \
+helper_xvrsqrtedp \
+helper_xvrsqrtesp \
+helper_xvsqrtdp \
+helper_xvsqrtsp \
+helper_xvsubdp \
+helper_xvsubsp \
+helper_xvtdivdp \
+helper_xvtdivsp \
+helper_xvtsqrtdp \
+helper_xvtsqrtsp \
+helper_xvtstdcdp \
+helper_xvtstdcsp \
+helper_xvxsigsp \
+helper_xxperm \
+helper_xxpermr \
 "
 
 ppc64_SYMBOLS=${ppc_SYMBOLS}
@@ -6290,26 +7763,26 @@ ARCHS="x86_64 arm aarch64 riscv32 riscv64 mips mipsel mips64 mips64el sparc spar
 
 for arch in $ARCHS; do
 
-echo "Generating header for $arch"
-echo "/* Autogen header for Unicorn Engine - DONOT MODIFY */" > $SOURCE_DIR/qemu/$arch.h
-echo "#ifndef UNICORN_AUTOGEN_${arch}_H" >> $SOURCE_DIR/qemu/$arch.h
-echo "#define UNICORN_AUTOGEN_${arch}_H" >> $SOURCE_DIR/qemu/$arch.h
-echo "#ifndef UNICORN_ARCH_POSTFIX" >> $SOURCE_DIR/qemu/$arch.h
-echo "#define UNICORN_ARCH_POSTFIX _$arch" >> $SOURCE_DIR/qemu/$arch.h
-echo "#endif" >> $SOURCE_DIR/qemu/$arch.h
+	echo "Generating header for $arch"
+	echo "/* Autogen header for Unicorn Engine - DONOT MODIFY */" >$SOURCE_DIR/qemu/$arch.h
+	echo "#ifndef UNICORN_AUTOGEN_${arch}_H" >>$SOURCE_DIR/qemu/$arch.h
+	echo "#define UNICORN_AUTOGEN_${arch}_H" >>$SOURCE_DIR/qemu/$arch.h
+	echo "#ifndef UNICORN_ARCH_POSTFIX" >>$SOURCE_DIR/qemu/$arch.h
+	echo "#define UNICORN_ARCH_POSTFIX _$arch" >>$SOURCE_DIR/qemu/$arch.h
+	echo "#endif" >>$SOURCE_DIR/qemu/$arch.h
 
-for loop in $COMMON_SYMBOLS; do
-    echo "#define $loop ${loop}_${arch}" >> $SOURCE_DIR/qemu/$arch.h
-done
+	for loop in $COMMON_SYMBOLS; do
+		echo "#define $loop ${loop}_${arch}" >>$SOURCE_DIR/qemu/$arch.h
+	done
 
-ARCH_SYMBOLS=$(eval echo '$'"${arch}_SYMBOLS")
+	ARCH_SYMBOLS=$(eval echo '$'"${arch}_SYMBOLS")
 
-#echo ${ARCH_SYMBOLS}
+	#echo ${ARCH_SYMBOLS}
 
-for loop in $ARCH_SYMBOLS; do
-    echo "#define $loop ${loop}_${arch}" >> $SOURCE_DIR/qemu/$arch.h
-done
+	for loop in $ARCH_SYMBOLS; do
+		echo "#define $loop ${loop}_${arch}" >>$SOURCE_DIR/qemu/$arch.h
+	done
 
-echo "#endif" >> $SOURCE_DIR/qemu/$arch.h
+	echo "#endif" >>$SOURCE_DIR/qemu/$arch.h
 
 done
diff --git a/uc.c b/uc.c
index 57cb6e8c76..fd29765080 100644
--- a/uc.c
+++ b/uc.c
@@ -11,7 +11,6 @@
 #include <stdlib.h>
 #endif
 
-#include <time.h> // nanosleep
 #include <string.h>
 
 #include "uc_priv.h"

From b4325f6a8a047c0fdfd557155e86a073acb1aef6 Mon Sep 17 00:00:00 2001
From: Zhang <admin@mayuyu.io>
Date: Sun, 6 Apr 2025 13:26:20 +0800
Subject: [PATCH 2/4] Squashed commit of the following:

    [qemu]It compiles!

    [qemu]In interpreter mode, disable allocation of _EXEC memory

    [qemu][uc]Define interpreter mode in build options

    [qemu]Copy tci from upstream qemu of corresponding version
---
 CMakeLists.txt                 |   16 +
 qemu/accel/tcg/translate-all.c |    6 +
 qemu/configure                 |   17 +-
 qemu/include/tcg/tcg.h         |    1 +
 qemu/tcg/tcg.c                 |    6 +
 qemu/tcg/tci.c                 | 1275 ++++++++++++++++++++++++++++++++
 qemu/tcg/tci/README            |  130 ++++
 qemu/tcg/tci/tcg-target.h      |  213 ++++++
 qemu/tcg/tci/tcg-target.inc.c  |  896 ++++++++++++++++++++++
 9 files changed, 2559 insertions(+), 1 deletion(-)
 create mode 100644 qemu/tcg/tci.c
 create mode 100644 qemu/tcg/tci/README
 create mode 100644 qemu/tcg/tci/tcg-target.h
 create mode 100644 qemu/tcg/tci/tcg-target.inc.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 81c56aa5e9..78db47b688 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,6 +88,7 @@ option(UNICORN_BUILD_TESTS "Build unicorn tests" ${PROJECT_IS_TOP_LEVEL})
 option(UNICORN_INSTALL "Enable unicorn installation" ${PROJECT_IS_TOP_LEVEL})
 set(UNICORN_ARCH "x86;arm;aarch64;riscv;mips;sparc;m68k;ppc;s390x;tricore" CACHE STRING "Enabled unicorn architectures")
 option(UNICORN_TRACER "Trace unicorn execution" OFF)
+option(UNICORN_INTERPRETER "Use interpreter mode" OFF)
 
 foreach(ARCH_LOOP ${UNICORN_ARCH})
     string(TOUPPER "${ARCH_LOOP}" ARCH_LOOP)
@@ -277,6 +278,10 @@ else()
         endwhile(TRUE)
     endif()
 
+    if(UNICORN_INTERPRETER)
+        set(UNICORN_TARGET_ARCH "tci")
+    endif()
+
     set(EXTRA_CFLAGS "--extra-cflags=")
     if(UNICORN_HAS_X86)
         set(EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_X86 ")
@@ -362,10 +367,17 @@ else()
     set(TARGET_LIST "${TARGET_LIST} ")
 
     # GEN config-host.mak & target directories
+    set(UNICORN_EXECUTION_MODE "")
+    if(UNICORN_INTERPRETER)
+        set(UNICORN_EXECUTION_MODE "--enable-interpreter")
+    else()
+        set(UNICORN_EXECUTION_MODE "--disable-interpreter")
+    endif()
     execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/qemu/configure
         --cc=${CMAKE_C_COMPILER}
         ${EXTRA_CFLAGS}
         ${TARGET_LIST}
+        ${UNICORN_EXECUTION_MODE}
         WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
     )
     execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/qemu/scripts/create_config
@@ -507,6 +519,10 @@ set(UNICORN_ARCH_COMMON
     qemu/softmmu/unicorn_vtlb.c
 )
 
+if(UNICORN_INTERPRETER)
+    list(APPEND UNICORN_ARCH_COMMON qemu/tcg/tci.c)
+endif()
+
 if(UNICORN_HAS_X86)
 add_library(x86_64-softmmu STATIC
     ${UNICORN_ARCH_COMMON}
diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c
index d240f35c87..0524fefd30 100644
--- a/qemu/accel/tcg/translate-all.c
+++ b/qemu/accel/tcg/translate-all.c
@@ -1019,12 +1019,18 @@ void free_code_gen_buffer(struct uc_struct *uc)
 static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
 {
     TCGContext *tcg_ctx = uc->tcg_ctx;
+#if CONFIG_TCG_INTERPRETER
+    int prot = PROT_WRITE | PROT_READ;
+#else
     int prot = PROT_WRITE | PROT_READ | PROT_EXEC;
+#endif
     int flags = MAP_PRIVATE | MAP_ANONYMOUS;
     size_t size = tcg_ctx->code_gen_buffer_size;
     void *buf;
+#if !CONFIG_TCG_INTERPRETER
 #ifdef USE_MAP_JIT
     flags |= MAP_JIT;
+#endif
 #endif
     buf = mmap(NULL, size, prot, flags, -1, 0);
     if (buf == MAP_FAILED) {
diff --git a/qemu/configure b/qemu/configure
index 47d4a4c6ef..f52b5b9531 100755
--- a/qemu/configure
+++ b/qemu/configure
@@ -272,6 +272,7 @@ supported_cpu="no"
 supported_os="no"
 bogus_os="no"
 malloc_trim=""
+interpreter="yes"
 
 # parse CC options first
 for opt do
@@ -308,6 +309,10 @@ for opt do
                 eval "cross_cc_${cc_arch}=\$optarg"
                 cross_cc_vars="$cross_cc_vars cross_cc_${cc_arch}"
   ;;
+  --enable-interpreter) interpreter="yes"
+  ;;
+  --disable-interpreter) interpreter="no"
+  ;;
   esac
 done
 # OS specific
@@ -689,6 +694,10 @@ for opt do
   ;;
   --disable-debug-info)
   ;;
+  --enable-interpreter)
+  ;;
+  --disable-interpreter)
+  ;;
   --cross-cc-*)
   ;;
   --cpu=*)
@@ -922,6 +931,7 @@ disabled with --disable-FEATURE, default is enabled if available:
   jemalloc        jemalloc support
   avx2            AVX2 optimization support
   avx512f         AVX512F optimization support
+  interpreter     Interpreter mode
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -2137,7 +2147,7 @@ fi
 ##########################################
 # check for Apple Silicon JIT function
 
-if [ "$darwin" = "yes" ] ; then
+if [ "$darwin" = "yes" ]   && [ "$interpreter" = "no" ] ; then
   cat > $TMPC << EOF
 #include <pthread.h>
 int main() { pthread_jit_write_protect_np(0); return 0;}
@@ -2313,6 +2323,7 @@ echo "tcmalloc support  $tcmalloc"
 echo "jemalloc support  $jemalloc"
 echo "avx2 optimization $avx2_opt"
 echo "avx512f optimization $avx512f_opt"
+echo "interpreter $interpreter"
 
 if test "$supported_cpu" = "no"; then
     echo
@@ -2557,6 +2568,10 @@ if test "$have_sprr_mrs" = "yes" ; then
   echo "HAVE_SPRR_MRS=y" >> $config_host_mak
 fi
 
+if test "$interpreter" = "yes" ; then
+  echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
+fi
+
 # Hold two types of flag:
 #   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
 #                                     a thread we have a handle to
diff --git a/qemu/include/tcg/tcg.h b/qemu/include/tcg/tcg.h
index 966103e25d..695609df0c 100644
--- a/qemu/include/tcg/tcg.h
+++ b/qemu/include/tcg/tcg.h
@@ -706,6 +706,7 @@ struct TCGContext {
     struct jit_code_entry *one_entry;
     /* qemu/tcg/tcg-common.c */
     TCGOpDef *tcg_op_defs;
+    size_t tcg_op_defs_max;
 
     // Unicorn engine variables
     struct uc_struct *uc;
diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c
index 3d23487176..0befcea492 100644
--- a/qemu/tcg/tcg.c
+++ b/qemu/tcg/tcg.c
@@ -62,6 +62,10 @@
 
 #include <uc_priv.h>
 
+#if CONFIG_TCG_INTERPRETER
+#include "tcg/tcg.h"
+#endif
+
 /* Forward declarations for functions declared in tcg-target.inc.c and
    used here. */
 static void tcg_target_init(TCGContext *s);
@@ -666,6 +670,7 @@ static const TCGOpDef tcg_op_defs_org[] = {
 #include "tcg/tcg-opc.h"
 #undef DEF
 };
+static const size_t tcg_op_defs_max_org = ARRAY_SIZE(tcg_op_defs_org);
 
 static void process_op_defs(TCGContext *s);
 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
@@ -734,6 +739,7 @@ void tcg_context_init(TCGContext *s)
     // copy original tcg_op_defs_org for private usage
     s->tcg_op_defs = g_malloc0(sizeof(tcg_op_defs_org));
     memcpy(s->tcg_op_defs, tcg_op_defs_org, sizeof(tcg_op_defs_org));
+    s->tcg_op_defs_max = tcg_op_defs_max_org;
 
     /* Count total number of arguments and allocate the corresponding
        space */
diff --git a/qemu/tcg/tci.c b/qemu/tcg/tci.c
new file mode 100644
index 0000000000..46fe9ce63f
--- /dev/null
+++ b/qemu/tcg/tci.c
@@ -0,0 +1,1275 @@
+/*
+ * Tiny Code Interpreter for QEMU
+ *
+ * Copyright (c) 2009, 2011, 2016 Stefan Weil
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+/* Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
+ * Without assertions, the interpreter runs much faster. */
+#if defined(CONFIG_DEBUG_TCG)
+# define tci_assert(cond) assert(cond)
+#else
+# define tci_assert(cond) ((void)0)
+#endif
+
+#include "qemu-common.h"
+#include "tcg/tcg.h"           /* MAX_OPC_PARAM_IARGS */
+#include "exec/cpu_ldst.h"
+#include "tcg/tcg-op.h"
+
+/* Marker for missing code. */
+#define TODO() \
+    do { \
+        fprintf(stderr, "TODO %s:%u: %s()\n", \
+                __FILE__, __LINE__, __func__); \
+        tcg_abort(); \
+    } while (0)
+
+#if MAX_OPC_PARAM_IARGS != 6
+# error Fix needed, number of supported input arguments changed!
+#endif
+#if TCG_TARGET_REG_BITS == 32
+typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong, tcg_target_ulong);
+#else
+typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong, tcg_target_ulong);
+#endif
+
+static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index)
+{
+    tci_assert(index < TCG_TARGET_NB_REGS);
+    return regs[index];
+}
+
+#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
+static int8_t tci_read_reg8s(const tcg_target_ulong *regs, TCGReg index)
+{
+    return (int8_t)tci_read_reg(regs, index);
+}
+#endif
+
+#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
+static int16_t tci_read_reg16s(const tcg_target_ulong *regs, TCGReg index)
+{
+    return (int16_t)tci_read_reg(regs, index);
+}
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+static int32_t tci_read_reg32s(const tcg_target_ulong *regs, TCGReg index)
+{
+    return (int32_t)tci_read_reg(regs, index);
+}
+#endif
+
+static uint8_t tci_read_reg8(const tcg_target_ulong *regs, TCGReg index)
+{
+    return (uint8_t)tci_read_reg(regs, index);
+}
+
+static uint16_t tci_read_reg16(const tcg_target_ulong *regs, TCGReg index)
+{
+    return (uint16_t)tci_read_reg(regs, index);
+}
+
+static uint32_t tci_read_reg32(const tcg_target_ulong *regs, TCGReg index)
+{
+    return (uint32_t)tci_read_reg(regs, index);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static uint64_t tci_read_reg64(const tcg_target_ulong *regs, TCGReg index)
+{
+    return tci_read_reg(regs, index);
+}
+#endif
+
+static void
+tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value)
+{
+    tci_assert(index < TCG_TARGET_NB_REGS);
+    tci_assert(index != TCG_AREG0);
+    tci_assert(index != TCG_REG_CALL_STACK);
+    regs[index] = value;
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void
+tci_write_reg32s(tcg_target_ulong *regs, TCGReg index, int32_t value)
+{
+    tci_write_reg(regs, index, value);
+}
+#endif
+
+static void tci_write_reg8(tcg_target_ulong *regs, TCGReg index, uint8_t value)
+{
+    tci_write_reg(regs, index, value);
+}
+
+static void
+tci_write_reg16(tcg_target_ulong *regs, TCGReg index, uint16_t value)
+{
+    tci_write_reg(regs, index, value);
+}
+
+static void
+tci_write_reg32(tcg_target_ulong *regs, TCGReg index, uint32_t value)
+{
+    tci_write_reg(regs, index, value);
+}
+
+#if TCG_TARGET_REG_BITS == 32
+static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index,
+                            uint32_t low_index, uint64_t value)
+{
+    tci_write_reg(regs, low_index, value);
+    tci_write_reg(regs, high_index, value >> 32);
+}
+#elif TCG_TARGET_REG_BITS == 64
+static void
+tci_write_reg64(tcg_target_ulong *regs, TCGReg index, uint64_t value)
+{
+    tci_write_reg(regs, index, value);
+}
+#endif
+
+#if TCG_TARGET_REG_BITS == 32
+/* Create a 64 bit value from two 32 bit values. */
+static uint64_t tci_uint64(uint32_t high, uint32_t low)
+{
+    return ((uint64_t)high << 32) + low;
+}
+#endif
+
+/* Read constant (native size) from bytecode. */
+static tcg_target_ulong tci_read_i(uint8_t **tb_ptr)
+{
+    tcg_target_ulong value = *(tcg_target_ulong *)(*tb_ptr);
+    *tb_ptr += sizeof(value);
+    return value;
+}
+
+/* Read unsigned constant (32 bit) from bytecode. */
+static uint32_t tci_read_i32(uint8_t **tb_ptr)
+{
+    uint32_t value = *(uint32_t *)(*tb_ptr);
+    *tb_ptr += sizeof(value);
+    return value;
+}
+
+/* Read signed constant (32 bit) from bytecode. */
+static int32_t tci_read_s32(uint8_t **tb_ptr)
+{
+    int32_t value = *(int32_t *)(*tb_ptr);
+    *tb_ptr += sizeof(value);
+    return value;
+}
+
+#if TCG_TARGET_REG_BITS == 64
+/* Read constant (64 bit) from bytecode. */
+static uint64_t tci_read_i64(uint8_t **tb_ptr)
+{
+    uint64_t value = *(uint64_t *)(*tb_ptr);
+    *tb_ptr += sizeof(value);
+    return value;
+}
+#endif
+
+/* Read indexed register (native size) from bytecode. */
+static tcg_target_ulong
+tci_read_r(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    tcg_target_ulong value = tci_read_reg(regs, **tb_ptr);
+    *tb_ptr += 1;
+    return value;
+}
+
+/* Read indexed register (8 bit) from bytecode. */
+static uint8_t tci_read_r8(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    uint8_t value = tci_read_reg8(regs, **tb_ptr);
+    *tb_ptr += 1;
+    return value;
+}
+
+#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
+/* Read indexed register (8 bit signed) from bytecode. */
+static int8_t tci_read_r8s(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    int8_t value = tci_read_reg8s(regs, **tb_ptr);
+    *tb_ptr += 1;
+    return value;
+}
+#endif
+
+/* Read indexed register (16 bit) from bytecode. */
+static uint16_t tci_read_r16(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    uint16_t value = tci_read_reg16(regs, **tb_ptr);
+    *tb_ptr += 1;
+    return value;
+}
+
+#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
+/* Read indexed register (16 bit signed) from bytecode. */
+static int16_t tci_read_r16s(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    int16_t value = tci_read_reg16s(regs, **tb_ptr);
+    *tb_ptr += 1;
+    return value;
+}
+#endif
+
+/* Read indexed register (32 bit) from bytecode. */
+static uint32_t tci_read_r32(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    uint32_t value = tci_read_reg32(regs, **tb_ptr);
+    *tb_ptr += 1;
+    return value;
+}
+
+#if TCG_TARGET_REG_BITS == 32
+/* Read two indexed registers (2 * 32 bit) from bytecode. */
+static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    uint32_t low = tci_read_r32(regs, tb_ptr);
+    return tci_uint64(tci_read_r32(regs, tb_ptr), low);
+}
+#elif TCG_TARGET_REG_BITS == 64
+/* Read indexed register (32 bit signed) from bytecode. */
+static int32_t tci_read_r32s(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    int32_t value = tci_read_reg32s(regs, **tb_ptr);
+    *tb_ptr += 1;
+    return value;
+}
+
+/* Read indexed register (64 bit) from bytecode. */
+static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    uint64_t value = tci_read_reg64(regs, **tb_ptr);
+    *tb_ptr += 1;
+    return value;
+}
+#endif
+
+/* Read indexed register(s) with target address from bytecode. */
+static target_ulong
+tci_read_ulong(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    target_ulong taddr = tci_read_r(regs, tb_ptr);
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+    taddr += (uint64_t)tci_read_r(regs, tb_ptr) << 32;
+#endif
+    return taddr;
+}
+
+/* Read indexed register or constant (native size) from bytecode. */
+static tcg_target_ulong
+tci_read_ri(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    tcg_target_ulong value;
+    TCGReg r = **tb_ptr;
+    *tb_ptr += 1;
+    if (r == TCG_CONST) {
+        value = tci_read_i(tb_ptr);
+    } else {
+        value = tci_read_reg(regs, r);
+    }
+    return value;
+}
+
+/* Read indexed register or constant (32 bit) from bytecode. */
+static uint32_t tci_read_ri32(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    uint32_t value;
+    TCGReg r = **tb_ptr;
+    *tb_ptr += 1;
+    if (r == TCG_CONST) {
+        value = tci_read_i32(tb_ptr);
+    } else {
+        value = tci_read_reg32(regs, r);
+    }
+    return value;
+}
+
+#if TCG_TARGET_REG_BITS == 32
+/* Read two indexed registers or constants (2 * 32 bit) from bytecode. */
+static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    uint32_t low = tci_read_ri32(regs, tb_ptr);
+    return tci_uint64(tci_read_ri32(regs, tb_ptr), low);
+}
+#elif TCG_TARGET_REG_BITS == 64
+/* Read indexed register or constant (64 bit) from bytecode. */
+static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+{
+    uint64_t value;
+    TCGReg r = **tb_ptr;
+    *tb_ptr += 1;
+    if (r == TCG_CONST) {
+        value = tci_read_i64(tb_ptr);
+    } else {
+        value = tci_read_reg64(regs, r);
+    }
+    return value;
+}
+#endif
+
+static tcg_target_ulong tci_read_label(uint8_t **tb_ptr)
+{
+    tcg_target_ulong label = tci_read_i(tb_ptr);
+    tci_assert(label != 0);
+    return label;
+}
+
+static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
+{
+    bool result = false;
+    int32_t i0 = u0;
+    int32_t i1 = u1;
+    switch (condition) {
+    case TCG_COND_EQ:
+        result = (u0 == u1);
+        break;
+    case TCG_COND_NE:
+        result = (u0 != u1);
+        break;
+    case TCG_COND_LT:
+        result = (i0 < i1);
+        break;
+    case TCG_COND_GE:
+        result = (i0 >= i1);
+        break;
+    case TCG_COND_LE:
+        result = (i0 <= i1);
+        break;
+    case TCG_COND_GT:
+        result = (i0 > i1);
+        break;
+    case TCG_COND_LTU:
+        result = (u0 < u1);
+        break;
+    case TCG_COND_GEU:
+        result = (u0 >= u1);
+        break;
+    case TCG_COND_LEU:
+        result = (u0 <= u1);
+        break;
+    case TCG_COND_GTU:
+        result = (u0 > u1);
+        break;
+    default:
+        TODO();
+    }
+    return result;
+}
+
+static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
+{
+    bool result = false;
+    int64_t i0 = u0;
+    int64_t i1 = u1;
+    switch (condition) {
+    case TCG_COND_EQ:
+        result = (u0 == u1);
+        break;
+    case TCG_COND_NE:
+        result = (u0 != u1);
+        break;
+    case TCG_COND_LT:
+        result = (i0 < i1);
+        break;
+    case TCG_COND_GE:
+        result = (i0 >= i1);
+        break;
+    case TCG_COND_LE:
+        result = (i0 <= i1);
+        break;
+    case TCG_COND_GT:
+        result = (i0 > i1);
+        break;
+    case TCG_COND_LTU:
+        result = (u0 < u1);
+        break;
+    case TCG_COND_GEU:
+        result = (u0 >= u1);
+        break;
+    case TCG_COND_LEU:
+        result = (u0 <= u1);
+        break;
+    case TCG_COND_GTU:
+        result = (u0 > u1);
+        break;
+    default:
+        TODO();
+    }
+    return result;
+}
+
+#ifdef CONFIG_SOFTMMU
+# define qemu_ld_ub \
+    helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_leuw \
+    helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_leul \
+    helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_leq \
+    helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_beuw \
+    helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_beul \
+    helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_beq \
+    helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_st_b(X) \
+    helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_lew(X) \
+    helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_lel(X) \
+    helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_leq(X) \
+    helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_bew(X) \
+    helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_bel(X) \
+    helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_beq(X) \
+    helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+#else
+# define qemu_ld_ub      ldub_p(g2h(taddr))
+# define qemu_ld_leuw    lduw_le_p(g2h(taddr))
+# define qemu_ld_leul    (uint32_t)ldl_le_p(g2h(taddr))
+# define qemu_ld_leq     ldq_le_p(g2h(taddr))
+# define qemu_ld_beuw    lduw_be_p(g2h(taddr))
+# define qemu_ld_beul    (uint32_t)ldl_be_p(g2h(taddr))
+# define qemu_ld_beq     ldq_be_p(g2h(taddr))
+# define qemu_st_b(X)    stb_p(g2h(taddr), X)
+# define qemu_st_lew(X)  stw_le_p(g2h(taddr), X)
+# define qemu_st_lel(X)  stl_le_p(g2h(taddr), X)
+# define qemu_st_leq(X)  stq_le_p(g2h(taddr), X)
+# define qemu_st_bew(X)  stw_be_p(g2h(taddr), X)
+# define qemu_st_bel(X)  stl_be_p(g2h(taddr), X)
+# define qemu_st_beq(X)  stq_be_p(g2h(taddr), X)
+#endif
+
+/* Interpret pseudo code in tb. */
+uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
+{
+    tcg_target_ulong regs[TCG_TARGET_NB_REGS];
+    long tcg_temps[CPU_TEMP_BUF_NLONGS];
+    uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
+    uintptr_t ret = 0;
+
+    regs[TCG_AREG0] = (tcg_target_ulong)env;
+    regs[TCG_REG_CALL_STACK] = sp_value;
+    tci_assert(tb_ptr);
+
+    for (;;) {
+        TCGOpcode opc = tb_ptr[0];
+#if defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
+        uint8_t op_size = tb_ptr[1];
+        uint8_t *old_code_ptr = tb_ptr;
+#endif
+        tcg_target_ulong t0;
+        tcg_target_ulong t1;
+        tcg_target_ulong t2;
+        tcg_target_ulong label;
+        TCGCond condition;
+        target_ulong taddr;
+        uint8_t tmp8;
+        uint16_t tmp16;
+        uint32_t tmp32;
+        uint64_t tmp64;
+#if TCG_TARGET_REG_BITS == 32
+        uint64_t v64;
+#endif
+        TCGMemOpIdx oi;
+
+#if defined(GETPC)
+        tci_tb_ptr = (uintptr_t)tb_ptr;
+#endif
+
+        /* Skip opcode and size entry. */
+        tb_ptr += 2;
+
+        switch (opc) {
+        case INDEX_op_call:
+            t0 = tci_read_ri(regs, &tb_ptr);
+#if TCG_TARGET_REG_BITS == 32
+            tmp64 = ((helper_function)t0)(tci_read_reg(regs, TCG_REG_R0),
+                                          tci_read_reg(regs, TCG_REG_R1),
+                                          tci_read_reg(regs, TCG_REG_R2),
+                                          tci_read_reg(regs, TCG_REG_R3),
+                                          tci_read_reg(regs, TCG_REG_R5),
+                                          tci_read_reg(regs, TCG_REG_R6),
+                                          tci_read_reg(regs, TCG_REG_R7),
+                                          tci_read_reg(regs, TCG_REG_R8),
+                                          tci_read_reg(regs, TCG_REG_R9),
+                                          tci_read_reg(regs, TCG_REG_R10),
+                                          tci_read_reg(regs, TCG_REG_R11),
+                                          tci_read_reg(regs, TCG_REG_R12));
+            tci_write_reg(regs, TCG_REG_R0, tmp64);
+            tci_write_reg(regs, TCG_REG_R1, tmp64 >> 32);
+#else
+            tmp64 = ((helper_function)t0)(tci_read_reg(regs, TCG_REG_R0),
+                                          tci_read_reg(regs, TCG_REG_R1),
+                                          tci_read_reg(regs, TCG_REG_R2),
+                                          tci_read_reg(regs, TCG_REG_R3),
+                                          tci_read_reg(regs, TCG_REG_R5),
+                                          tci_read_reg(regs, TCG_REG_R6));
+            tci_write_reg(regs, TCG_REG_R0, tmp64);
+#endif
+            break;
+        case INDEX_op_br:
+            label = tci_read_label(&tb_ptr);
+            tci_assert(tb_ptr == old_code_ptr + op_size);
+            tb_ptr = (uint8_t *)label;
+            continue;
+        case INDEX_op_setcond_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            condition = *tb_ptr++;
+            tci_write_reg32(regs, t0, tci_compare32(t1, t2, condition));
+            break;
+#if TCG_TARGET_REG_BITS == 32
+        case INDEX_op_setcond2_i32:
+            t0 = *tb_ptr++;
+            tmp64 = tci_read_r64(regs, &tb_ptr);
+            v64 = tci_read_ri64(regs, &tb_ptr);
+            condition = *tb_ptr++;
+            tci_write_reg32(regs, t0, tci_compare64(tmp64, v64, condition));
+            break;
+#elif TCG_TARGET_REG_BITS == 64
+        case INDEX_op_setcond_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            condition = *tb_ptr++;
+            tci_write_reg64(regs, t0, tci_compare64(t1, t2, condition));
+            break;
+#endif
+        case INDEX_op_mov_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1);
+            break;
+        case INDEX_op_movi_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_i32(&tb_ptr);
+            tci_write_reg32(regs, t0, t1);
+            break;
+
+            /* Load/store operations (32 bit). */
+
+        case INDEX_op_ld8u_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            tci_write_reg8(regs, t0, *(uint8_t *)(t1 + t2));
+            break;
+        case INDEX_op_ld8s_i32:
+            TODO();
+            break;
+        case INDEX_op_ld16u_i32:
+            TODO();
+            break;
+        case INDEX_op_ld16s_i32:
+            TODO();
+            break;
+        case INDEX_op_ld_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            tci_write_reg32(regs, t0, *(uint32_t *)(t1 + t2));
+            break;
+        case INDEX_op_st8_i32:
+            t0 = tci_read_r8(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            *(uint8_t *)(t1 + t2) = t0;
+            break;
+        case INDEX_op_st16_i32:
+            t0 = tci_read_r16(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            *(uint16_t *)(t1 + t2) = t0;
+            break;
+        case INDEX_op_st_i32:
+            t0 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            tci_assert(t1 != sp_value || (int32_t)t2 < 0);
+            *(uint32_t *)(t1 + t2) = t0;
+            break;
+
+            /* Arithmetic operations (32 bit). */
+
+        case INDEX_op_add_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1 + t2);
+            break;
+        case INDEX_op_sub_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1 - t2);
+            break;
+        case INDEX_op_mul_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1 * t2);
+            break;
+#if TCG_TARGET_HAS_div_i32
+        case INDEX_op_div_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, (int32_t)t1 / (int32_t)t2);
+            break;
+        case INDEX_op_divu_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1 / t2);
+            break;
+        case INDEX_op_rem_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, (int32_t)t1 % (int32_t)t2);
+            break;
+        case INDEX_op_remu_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1 % t2);
+            break;
+#elif TCG_TARGET_HAS_div2_i32
+        case INDEX_op_div2_i32:
+        case INDEX_op_divu2_i32:
+            TODO();
+            break;
+#endif
+        case INDEX_op_and_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1 & t2);
+            break;
+        case INDEX_op_or_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1 | t2);
+            break;
+        case INDEX_op_xor_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1 ^ t2);
+            break;
+
+            /* Shift/rotate operations (32 bit). */
+
+        case INDEX_op_shl_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1 << (t2 & 31));
+            break;
+        case INDEX_op_shr_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1 >> (t2 & 31));
+            break;
+        case INDEX_op_sar_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, ((int32_t)t1 >> (t2 & 31)));
+            break;
+#if TCG_TARGET_HAS_rot_i32
+        case INDEX_op_rotl_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, rol32(t1, t2 & 31));
+            break;
+        case INDEX_op_rotr_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            t2 = tci_read_ri32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, ror32(t1, t2 & 31));
+            break;
+#endif
+#if TCG_TARGET_HAS_deposit_i32
+        case INDEX_op_deposit_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r32(regs, &tb_ptr);
+            t2 = tci_read_r32(regs, &tb_ptr);
+            tmp16 = *tb_ptr++;
+            tmp8 = *tb_ptr++;
+            tmp32 = (((1 << tmp8) - 1) << tmp16);
+            tci_write_reg32(regs, t0, (t1 & ~tmp32) | ((t2 << tmp16) & tmp32));
+            break;
+#endif
+        case INDEX_op_brcond_i32:
+            t0 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_ri32(regs, &tb_ptr);
+            condition = *tb_ptr++;
+            label = tci_read_label(&tb_ptr);
+            if (tci_compare32(t0, t1, condition)) {
+                tci_assert(tb_ptr == old_code_ptr + op_size);
+                tb_ptr = (uint8_t *)label;
+                continue;
+            }
+            break;
+#if TCG_TARGET_REG_BITS == 32
+        case INDEX_op_add2_i32:
+            t0 = *tb_ptr++;
+            t1 = *tb_ptr++;
+            tmp64 = tci_read_r64(regs, &tb_ptr);
+            tmp64 += tci_read_r64(regs, &tb_ptr);
+            tci_write_reg64(regs, t1, t0, tmp64);
+            break;
+        case INDEX_op_sub2_i32:
+            t0 = *tb_ptr++;
+            t1 = *tb_ptr++;
+            tmp64 = tci_read_r64(regs, &tb_ptr);
+            tmp64 -= tci_read_r64(regs, &tb_ptr);
+            tci_write_reg64(regs, t1, t0, tmp64);
+            break;
+        case INDEX_op_brcond2_i32:
+            tmp64 = tci_read_r64(regs, &tb_ptr);
+            v64 = tci_read_ri64(regs, &tb_ptr);
+            condition = *tb_ptr++;
+            label = tci_read_label(&tb_ptr);
+            if (tci_compare64(tmp64, v64, condition)) {
+                tci_assert(tb_ptr == old_code_ptr + op_size);
+                tb_ptr = (uint8_t *)label;
+                continue;
+            }
+            break;
+        case INDEX_op_mulu2_i32:
+            t0 = *tb_ptr++;
+            t1 = *tb_ptr++;
+            t2 = tci_read_r32(regs, &tb_ptr);
+            tmp64 = tci_read_r32(regs, &tb_ptr);
+            tci_write_reg64(regs, t1, t0, t2 * tmp64);
+            break;
+#endif /* TCG_TARGET_REG_BITS == 32 */
+#if TCG_TARGET_HAS_ext8s_i32
+        case INDEX_op_ext8s_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r8s(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1);
+            break;
+#endif
+#if TCG_TARGET_HAS_ext16s_i32
+        case INDEX_op_ext16s_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r16s(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1);
+            break;
+#endif
+#if TCG_TARGET_HAS_ext8u_i32
+        case INDEX_op_ext8u_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r8(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1);
+            break;
+#endif
+#if TCG_TARGET_HAS_ext16u_i32
+        case INDEX_op_ext16u_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r16(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, t1);
+            break;
+#endif
+#if TCG_TARGET_HAS_bswap16_i32
+        case INDEX_op_bswap16_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r16(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, bswap16(t1));
+            break;
+#endif
+#if TCG_TARGET_HAS_bswap32_i32
+        case INDEX_op_bswap32_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, bswap32(t1));
+            break;
+#endif
+#if TCG_TARGET_HAS_not_i32
+        case INDEX_op_not_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, ~t1);
+            break;
+#endif
+#if TCG_TARGET_HAS_neg_i32
+        case INDEX_op_neg_i32:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r32(regs, &tb_ptr);
+            tci_write_reg32(regs, t0, -t1);
+            break;
+#endif
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_mov_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1);
+            break;
+        case INDEX_op_movi_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_i64(&tb_ptr);
+            tci_write_reg64(regs, t0, t1);
+            break;
+
+            /* Load/store operations (64 bit). */
+
+        case INDEX_op_ld8u_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            tci_write_reg8(regs, t0, *(uint8_t *)(t1 + t2));
+            break;
+        case INDEX_op_ld8s_i64:
+            TODO();
+            break;
+        case INDEX_op_ld16u_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            tci_write_reg16(regs, t0, *(uint16_t *)(t1 + t2));
+            break;
+        case INDEX_op_ld16s_i64:
+            TODO();
+            break;
+        case INDEX_op_ld32u_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            tci_write_reg32(regs, t0, *(uint32_t *)(t1 + t2));
+            break;
+        case INDEX_op_ld32s_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            tci_write_reg32s(regs, t0, *(int32_t *)(t1 + t2));
+            break;
+        case INDEX_op_ld_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            tci_write_reg64(regs, t0, *(uint64_t *)(t1 + t2));
+            break;
+        case INDEX_op_st8_i64:
+            t0 = tci_read_r8(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            *(uint8_t *)(t1 + t2) = t0;
+            break;
+        case INDEX_op_st16_i64:
+            t0 = tci_read_r16(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            *(uint16_t *)(t1 + t2) = t0;
+            break;
+        case INDEX_op_st32_i64:
+            t0 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            *(uint32_t *)(t1 + t2) = t0;
+            break;
+        case INDEX_op_st_i64:
+            t0 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            tci_assert(t1 != sp_value || (int32_t)t2 < 0);
+            *(uint64_t *)(t1 + t2) = t0;
+            break;
+
+            /* Arithmetic operations (64 bit). */
+
+        case INDEX_op_add_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1 + t2);
+            break;
+        case INDEX_op_sub_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1 - t2);
+            break;
+        case INDEX_op_mul_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1 * t2);
+            break;
+#if TCG_TARGET_HAS_div_i64
+        case INDEX_op_div_i64:
+        case INDEX_op_divu_i64:
+        case INDEX_op_rem_i64:
+        case INDEX_op_remu_i64:
+            TODO();
+            break;
+#elif TCG_TARGET_HAS_div2_i64
+        case INDEX_op_div2_i64:
+        case INDEX_op_divu2_i64:
+            TODO();
+            break;
+#endif
+        case INDEX_op_and_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1 & t2);
+            break;
+        case INDEX_op_or_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1 | t2);
+            break;
+        case INDEX_op_xor_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1 ^ t2);
+            break;
+
+            /* Shift/rotate operations (64 bit). */
+
+        case INDEX_op_shl_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1 << (t2 & 63));
+            break;
+        case INDEX_op_shr_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1 >> (t2 & 63));
+            break;
+        case INDEX_op_sar_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, ((int64_t)t1 >> (t2 & 63)));
+            break;
+#if TCG_TARGET_HAS_rot_i64
+        case INDEX_op_rotl_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, rol64(t1, t2 & 63));
+            break;
+        case INDEX_op_rotr_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            t2 = tci_read_ri64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, ror64(t1, t2 & 63));
+            break;
+#endif
+#if TCG_TARGET_HAS_deposit_i64
+        case INDEX_op_deposit_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r64(regs, &tb_ptr);
+            t2 = tci_read_r64(regs, &tb_ptr);
+            tmp16 = *tb_ptr++;
+            tmp8 = *tb_ptr++;
+            tmp64 = (((1ULL << tmp8) - 1) << tmp16);
+            tci_write_reg64(regs, t0, (t1 & ~tmp64) | ((t2 << tmp16) & tmp64));
+            break;
+#endif
+        case INDEX_op_brcond_i64:
+            t0 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_ri64(regs, &tb_ptr);
+            condition = *tb_ptr++;
+            label = tci_read_label(&tb_ptr);
+            if (tci_compare64(t0, t1, condition)) {
+                tci_assert(tb_ptr == old_code_ptr + op_size);
+                tb_ptr = (uint8_t *)label;
+                continue;
+            }
+            break;
+#if TCG_TARGET_HAS_ext8u_i64
+        case INDEX_op_ext8u_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r8(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1);
+            break;
+#endif
+#if TCG_TARGET_HAS_ext8s_i64
+        case INDEX_op_ext8s_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r8s(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1);
+            break;
+#endif
+#if TCG_TARGET_HAS_ext16s_i64
+        case INDEX_op_ext16s_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r16s(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1);
+            break;
+#endif
+#if TCG_TARGET_HAS_ext16u_i64
+        case INDEX_op_ext16u_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r16(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1);
+            break;
+#endif
+#if TCG_TARGET_HAS_ext32s_i64
+        case INDEX_op_ext32s_i64:
+#endif
+        case INDEX_op_ext_i32_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r32s(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1);
+            break;
+#if TCG_TARGET_HAS_ext32u_i64
+        case INDEX_op_ext32u_i64:
+#endif
+        case INDEX_op_extu_i32_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r32(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, t1);
+            break;
+#if TCG_TARGET_HAS_bswap16_i64
+        case INDEX_op_bswap16_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r16(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, bswap16(t1));
+            break;
+#endif
+#if TCG_TARGET_HAS_bswap32_i64
+        case INDEX_op_bswap32_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r32(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, bswap32(t1));
+            break;
+#endif
+#if TCG_TARGET_HAS_bswap64_i64
+        case INDEX_op_bswap64_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, bswap64(t1));
+            break;
+#endif
+#if TCG_TARGET_HAS_not_i64
+        case INDEX_op_not_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, ~t1);
+            break;
+#endif
+#if TCG_TARGET_HAS_neg_i64
+        case INDEX_op_neg_i64:
+            t0 = *tb_ptr++;
+            t1 = tci_read_r64(regs, &tb_ptr);
+            tci_write_reg64(regs, t0, -t1);
+            break;
+#endif
+#endif /* TCG_TARGET_REG_BITS == 64 */
+
+            /* QEMU specific operations. */
+
+        case INDEX_op_exit_tb:
+            ret = *(uint64_t *)tb_ptr;
+            goto exit;
+            break;
+        case INDEX_op_goto_tb:
+            /* Jump address is aligned */
+            tb_ptr = QEMU_ALIGN_PTR_UP(tb_ptr, 4);
+            t0 = atomic_read((int32_t *)tb_ptr);
+            tb_ptr += sizeof(int32_t);
+            tci_assert(tb_ptr == old_code_ptr + op_size);
+            tb_ptr += (int32_t)t0;
+            continue;
+        case INDEX_op_qemu_ld_i32:
+            t0 = *tb_ptr++;
+            taddr = tci_read_ulong(regs, &tb_ptr);
+            oi = tci_read_i(&tb_ptr);
+            switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) {
+            case MO_UB:
+                tmp32 = qemu_ld_ub;
+                break;
+            case MO_SB:
+                tmp32 = (int8_t)qemu_ld_ub;
+                break;
+            case MO_LEUW:
+                tmp32 = qemu_ld_leuw;
+                break;
+            case MO_LESW:
+                tmp32 = (int16_t)qemu_ld_leuw;
+                break;
+            case MO_LEUL:
+                tmp32 = qemu_ld_leul;
+                break;
+            case MO_BEUW:
+                tmp32 = qemu_ld_beuw;
+                break;
+            case MO_BESW:
+                tmp32 = (int16_t)qemu_ld_beuw;
+                break;
+            case MO_BEUL:
+                tmp32 = qemu_ld_beul;
+                break;
+            default:
+                tcg_abort();
+            }
+            tci_write_reg(regs, t0, tmp32);
+            break;
+        case INDEX_op_qemu_ld_i64:
+            t0 = *tb_ptr++;
+            if (TCG_TARGET_REG_BITS == 32) {
+                t1 = *tb_ptr++;
+            }
+            taddr = tci_read_ulong(regs, &tb_ptr);
+            oi = tci_read_i(&tb_ptr);
+            switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) {
+            case MO_UB:
+                tmp64 = qemu_ld_ub;
+                break;
+            case MO_SB:
+                tmp64 = (int8_t)qemu_ld_ub;
+                break;
+            case MO_LEUW:
+                tmp64 = qemu_ld_leuw;
+                break;
+            case MO_LESW:
+                tmp64 = (int16_t)qemu_ld_leuw;
+                break;
+            case MO_LEUL:
+                tmp64 = qemu_ld_leul;
+                break;
+            case MO_LESL:
+                tmp64 = (int32_t)qemu_ld_leul;
+                break;
+            case MO_LEQ:
+                tmp64 = qemu_ld_leq;
+                break;
+            case MO_BEUW:
+                tmp64 = qemu_ld_beuw;
+                break;
+            case MO_BESW:
+                tmp64 = (int16_t)qemu_ld_beuw;
+                break;
+            case MO_BEUL:
+                tmp64 = qemu_ld_beul;
+                break;
+            case MO_BESL:
+                tmp64 = (int32_t)qemu_ld_beul;
+                break;
+            case MO_BEQ:
+                tmp64 = qemu_ld_beq;
+                break;
+            default:
+                tcg_abort();
+            }
+            tci_write_reg(regs, t0, tmp64);
+            if (TCG_TARGET_REG_BITS == 32) {
+                tci_write_reg(regs, t1, tmp64 >> 32);
+            }
+            break;
+        case INDEX_op_qemu_st_i32:
+            t0 = tci_read_r(regs, &tb_ptr);
+            taddr = tci_read_ulong(regs, &tb_ptr);
+            oi = tci_read_i(&tb_ptr);
+            switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) {
+            case MO_UB:
+                qemu_st_b(t0);
+                break;
+            case MO_LEUW:
+                qemu_st_lew(t0);
+                break;
+            case MO_LEUL:
+                qemu_st_lel(t0);
+                break;
+            case MO_BEUW:
+                qemu_st_bew(t0);
+                break;
+            case MO_BEUL:
+                qemu_st_bel(t0);
+                break;
+            default:
+                tcg_abort();
+            }
+            break;
+        case INDEX_op_qemu_st_i64:
+            tmp64 = tci_read_r64(regs, &tb_ptr);
+            taddr = tci_read_ulong(regs, &tb_ptr);
+            oi = tci_read_i(&tb_ptr);
+            switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) {
+            case MO_UB:
+                qemu_st_b(tmp64);
+                break;
+            case MO_LEUW:
+                qemu_st_lew(tmp64);
+                break;
+            case MO_LEUL:
+                qemu_st_lel(tmp64);
+                break;
+            case MO_LEQ:
+                qemu_st_leq(tmp64);
+                break;
+            case MO_BEUW:
+                qemu_st_bew(tmp64);
+                break;
+            case MO_BEUL:
+                qemu_st_bel(tmp64);
+                break;
+            case MO_BEQ:
+                qemu_st_beq(tmp64);
+                break;
+            default:
+                tcg_abort();
+            }
+            break;
+        case INDEX_op_mb:
+            /* Ensure ordering for all kinds */
+            smp_mb();
+            break;
+        default:
+            TODO();
+            break;
+        }
+        tci_assert(tb_ptr == old_code_ptr + op_size);
+    }
+exit:
+    return ret;
+}
diff --git a/qemu/tcg/tci/README b/qemu/tcg/tci/README
new file mode 100644
index 0000000000..386c3c7507
--- /dev/null
+++ b/qemu/tcg/tci/README
@@ -0,0 +1,130 @@
+TCG Interpreter (TCI) - Copyright (c) 2011 Stefan Weil.
+
+This file is released under the BSD license.
+
+1) Introduction
+
+TCG (Tiny Code Generator) is a code generator which translates
+code fragments ("basic blocks") from target code (any of the
+targets supported by QEMU) to a code representation which
+can be run on a host.
+
+QEMU can create native code for some hosts (arm, i386, ia64, ppc, ppc64,
+s390, sparc, x86_64). For others, unofficial host support was written.
+
+By adding a code generator for a virtual machine and using an
+interpreter for the generated bytecode, it is possible to
+support (almost) any host.
+
+This is what TCI (Tiny Code Interpreter) does.
+
+2) Implementation
+
+Like each TCG host frontend, TCI implements the code generator in
+tcg-target.inc.c, tcg-target.h. Both files are in directory tcg/tci.
+
+The additional file tcg/tci.c adds the interpreter.
+
+The bytecode consists of opcodes (same numeric values as those used by
+TCG), command length and arguments of variable size and number.
+
+3) Usage
+
+For hosts without native TCG, the interpreter TCI must be enabled by
+
+        configure --enable-tcg-interpreter
+
+If configure is called without --enable-tcg-interpreter, it will
+suggest using this option. Setting it automatically would need
+additional code in configure which must be fixed when new native TCG
+implementations are added.
+
+System emulation should work on any 32 or 64 bit host.
+User mode emulation might work. Maybe a new linker script (*.ld)
+is needed. Byte order might be wrong (on big endian hosts)
+and need fixes in configure.
+
+For hosts with native TCG, the interpreter TCI can be enabled by
+
+        configure --enable-tcg-interpreter
+
+The only difference from running QEMU with TCI to running without TCI
+should be speed. Especially during development of TCI, it was very
+useful to compare runs with and without TCI. Create /tmp/qemu.log by
+
+        qemu-system-i386 -d in_asm,op_opt,cpu -D /tmp/qemu.log -singlestep
+
+once with interpreter and once without interpreter and compare the resulting
+qemu.log files. This is also useful to see the effects of additional
+registers or additional opcodes (it is easy to modify the virtual machine).
+It can also be used to verify native TCGs.
+
+Hosts with native TCG can also enable TCI by claiming to be unsupported:
+
+        configure --cpu=unknown --enable-tcg-interpreter
+
+configure then no longer uses the native linker script (*.ld) for
+user mode emulation.
+
+
+4) Status
+
+TCI needs special implementation for 32 and 64 bit host, 32 and 64 bit target,
+host and target with same or different endianness.
+
+            | host (le)                     host (be)
+            | 32             64             32             64
+------------+------------------------------------------------------------
+target (le) | s0, u0         s1, u1         s?, u?         s?, u?
+32 bit      |
+            |
+target (le) | sc, uc         s1, u1         s?, u?         s?, u?
+64 bit      |
+            |
+target (be) | sc, u0         sc, uc         s?, u?         s?, u?
+32 bit      |
+            |
+target (be) | sc, uc         sc, uc         s?, u?         s?, u?
+64 bit      |
+            |
+
+System emulation
+s? = untested
+sc = compiles
+s0 = bios works
+s1 = grub works
+s2 = Linux boots
+
+Linux user mode emulation
+u? = untested
+uc = compiles
+u0 = static hello works
+u1 = linux-user-test works
+
+5) Todo list
+
+* TCI is not widely tested. It was written and tested on a x86_64 host
+  running i386 and x86_64 system emulation and Linux user mode.
+  A cross compiled QEMU for i386 host also works with the same basic tests.
+  A cross compiled QEMU for mipsel host works, too. It is terribly slow
+  because I run it in a mips malta emulation, so it is an interpreted
+  emulation in an emulation.
+  A cross compiled QEMU for arm host works (tested with pc bios).
+  A cross compiled QEMU for ppc host works at least partially:
+  i386-linux-user/qemu-i386 can run a simple hello-world program
+  (tested in a ppc emulation).
+
+* Some TCG opcodes are either missing in the code generator and/or
+  in the interpreter. These opcodes raise a runtime exception, so it is
+  possible to see where code must be added.
+
+* The pseudo code is not optimized and still ugly. For hosts with special
+  alignment requirements, it needs some fixes (maybe aligned bytecode
+  would also improve speed for hosts which support byte alignment).
+
+* A better disassembler for the pseudo code would be nice (a very primitive
+  disassembler is included in tcg-target.inc.c).
+
+* It might be useful to have a runtime option which selects the native TCG
+  or TCI, so QEMU would have to include two TCGs. Today, selecting TCI
+  is a configure option, so you need two compilations of QEMU.
diff --git a/qemu/tcg/tci/tcg-target.h b/qemu/tcg/tci/tcg-target.h
new file mode 100644
index 0000000000..8b90ab71cb
--- /dev/null
+++ b/qemu/tcg/tci/tcg-target.h
@@ -0,0 +1,213 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009, 2011 Stefan Weil
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * This code implements a TCG which does not generate machine code for some
+ * real target machine but which generates virtual machine code for an
+ * interpreter. Interpreted pseudo code is slow, but it works on any host.
+ *
+ * Some remarks might help in understanding the code:
+ *
+ * "target" or "TCG target" is the machine which runs the generated code.
+ * This is different to the usual meaning in QEMU where "target" is the
+ * emulated machine. So normally QEMU host is identical to TCG target.
+ * Here the TCG target is a virtual machine, but this virtual machine must
+ * use the same word size like the real machine.
+ * Therefore, we need both 32 and 64 bit virtual machines (interpreter).
+ */
+
+#ifndef TCG_TARGET_H
+#define TCG_TARGET_H
+
+#define TCG_TARGET_INTERPRETER 1
+#define TCG_TARGET_INSN_UNIT_SIZE 1
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+
+#if UINTPTR_MAX == UINT32_MAX
+# define TCG_TARGET_REG_BITS 32
+#elif UINTPTR_MAX == UINT64_MAX
+# define TCG_TARGET_REG_BITS 64
+#else
+# error Unknown pointer size for tci target
+#endif
+
+#ifdef CONFIG_DEBUG_TCG
+/* Enable debug output. */
+#define CONFIG_DEBUG_TCG_INTERPRETER
+#endif
+
+/* Optional instructions. */
+
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_div_i32          1
+#define TCG_TARGET_HAS_rem_i32          1
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8u_i32        1
+#define TCG_TARGET_HAS_ext16u_i32       1
+#define TCG_TARGET_HAS_andc_i32         0
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract2_i32     0
+#define TCG_TARGET_HAS_eqv_i32          0
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          0
+#define TCG_TARGET_HAS_clz_i32          0
+#define TCG_TARGET_HAS_ctz_i32          0
+#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_neg_i32          1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_orc_i32          0
+#define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_goto_ptr         0
+#define TCG_TARGET_HAS_direct_jump      1
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_extrl_i64_i32    0
+#define TCG_TARGET_HAS_extrh_i64_i32    0
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_div_i64          0
+#define TCG_TARGET_HAS_rem_i64          0
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        1
+#define TCG_TARGET_HAS_ext16u_i64       1
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_andc_i64         0
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_clz_i64          0
+#define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
+#define TCG_TARGET_HAS_neg_i64          1
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_orc_i64          0
+#define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        0
+#define TCG_TARGET_HAS_mulsh_i64        0
+#else
+#define TCG_TARGET_HAS_mulu2_i32        1
+#endif /* TCG_TARGET_REG_BITS == 64 */
+
+/* Number of registers available.
+   For 32 bit hosts, we need more than 8 registers (call arguments). */
+/* #define TCG_TARGET_NB_REGS 8 */
+#define TCG_TARGET_NB_REGS 16
+/* #define TCG_TARGET_NB_REGS 32 */
+
+/* List of registers which are used by TCG. */
+typedef enum {
+    TCG_REG_R0 = 0,
+    TCG_REG_R1,
+    TCG_REG_R2,
+    TCG_REG_R3,
+    TCG_REG_R4,
+    TCG_REG_R5,
+    TCG_REG_R6,
+    TCG_REG_R7,
+#if TCG_TARGET_NB_REGS >= 16
+    TCG_REG_R8,
+    TCG_REG_R9,
+    TCG_REG_R10,
+    TCG_REG_R11,
+    TCG_REG_R12,
+    TCG_REG_R13,
+    TCG_REG_R14,
+    TCG_REG_R15,
+#if TCG_TARGET_NB_REGS >= 32
+    TCG_REG_R16,
+    TCG_REG_R17,
+    TCG_REG_R18,
+    TCG_REG_R19,
+    TCG_REG_R20,
+    TCG_REG_R21,
+    TCG_REG_R22,
+    TCG_REG_R23,
+    TCG_REG_R24,
+    TCG_REG_R25,
+    TCG_REG_R26,
+    TCG_REG_R27,
+    TCG_REG_R28,
+    TCG_REG_R29,
+    TCG_REG_R30,
+    TCG_REG_R31,
+#endif
+#endif
+    /* Special value UINT8_MAX is used by TCI to encode constant values. */
+    TCG_CONST = UINT8_MAX
+} TCGReg;
+
+#define TCG_AREG0                       (TCG_TARGET_NB_REGS - 2)
+
+/* Used for function call generation. */
+#define TCG_REG_CALL_STACK              (TCG_TARGET_NB_REGS - 1)
+#define TCG_TARGET_CALL_STACK_OFFSET    0
+#define TCG_TARGET_STACK_ALIGN          16
+
+void tci_disas(uint8_t opc);
+
+#define HAVE_TCG_QEMU_TB_EXEC
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+}
+
+/* We could notice __i386__ or __s390x__ and reduce the barriers depending
+   on the host.  But if you want performance, you use the normal backend.
+   We prefer consistency across hosts on this.  */
+#define TCG_TARGET_DEFAULT_MO  (0)
+
+#define TCG_TARGET_HAS_MEMORY_BSWAP     1
+
+static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
+                                            uintptr_t jmp_addr, uintptr_t addr)
+{
+    /* patch the branch destination */
+    atomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4));
+    /* no need to flush icache explicitly */
+}
+
+#endif /* TCG_TARGET_H */
diff --git a/qemu/tcg/tci/tcg-target.inc.c b/qemu/tcg/tci/tcg-target.inc.c
new file mode 100644
index 0000000000..ab3114532f
--- /dev/null
+++ b/qemu/tcg/tci/tcg-target.inc.c
@@ -0,0 +1,896 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009, 2011 Stefan Weil
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* TODO list:
+ * - See TODO comments in code.
+ */
+
+/* Marker for missing code. */
+#define TODO() \
+    do { \
+        fprintf(stderr, "TODO %s:%u: %s()\n", \
+                __FILE__, __LINE__, __func__); \
+        tcg_abort(); \
+    } while (0)
+
+/* Bitfield n...m (in 32 bit value). */
+#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
+
+/* Macros used in tcg_target_op_defs. */
+#define R       "r"
+#define RI      "ri"
+#if TCG_TARGET_REG_BITS == 32
+# define R64    "r", "r"
+#else
+# define R64    "r"
+#endif
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+# define L      "L", "L"
+# define S      "S", "S"
+#else
+# define L      "L"
+# define S      "S"
+#endif
+
+/* TODO: documentation. */
+static const TCGTargetOpDef tcg_target_op_defs[] = {
+    { INDEX_op_exit_tb, { NULL } },
+    { INDEX_op_goto_tb, { NULL } },
+    { INDEX_op_br, { NULL } },
+
+    { INDEX_op_ld8u_i32, { R, R } },
+    { INDEX_op_ld8s_i32, { R, R } },
+    { INDEX_op_ld16u_i32, { R, R } },
+    { INDEX_op_ld16s_i32, { R, R } },
+    { INDEX_op_ld_i32, { R, R } },
+    { INDEX_op_st8_i32, { R, R } },
+    { INDEX_op_st16_i32, { R, R } },
+    { INDEX_op_st_i32, { R, R } },
+
+    { INDEX_op_add_i32, { R, RI, RI } },
+    { INDEX_op_sub_i32, { R, RI, RI } },
+    { INDEX_op_mul_i32, { R, RI, RI } },
+#if TCG_TARGET_HAS_div_i32
+    { INDEX_op_div_i32, { R, R, R } },
+    { INDEX_op_divu_i32, { R, R, R } },
+    { INDEX_op_rem_i32, { R, R, R } },
+    { INDEX_op_remu_i32, { R, R, R } },
+#elif TCG_TARGET_HAS_div2_i32
+    { INDEX_op_div2_i32, { R, R, "0", "1", R } },
+    { INDEX_op_divu2_i32, { R, R, "0", "1", R } },
+#endif
+    /* TODO: Does R, RI, RI result in faster code than R, R, RI?
+       If both operands are constants, we can optimize. */
+    { INDEX_op_and_i32, { R, RI, RI } },
+#if TCG_TARGET_HAS_andc_i32
+    { INDEX_op_andc_i32, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_eqv_i32
+    { INDEX_op_eqv_i32, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_nand_i32
+    { INDEX_op_nand_i32, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_nor_i32
+    { INDEX_op_nor_i32, { R, RI, RI } },
+#endif
+    { INDEX_op_or_i32, { R, RI, RI } },
+#if TCG_TARGET_HAS_orc_i32
+    { INDEX_op_orc_i32, { R, RI, RI } },
+#endif
+    { INDEX_op_xor_i32, { R, RI, RI } },
+    { INDEX_op_shl_i32, { R, RI, RI } },
+    { INDEX_op_shr_i32, { R, RI, RI } },
+    { INDEX_op_sar_i32, { R, RI, RI } },
+#if TCG_TARGET_HAS_rot_i32
+    { INDEX_op_rotl_i32, { R, RI, RI } },
+    { INDEX_op_rotr_i32, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_deposit_i32
+    { INDEX_op_deposit_i32, { R, "0", R } },
+#endif
+
+    { INDEX_op_brcond_i32, { R, RI } },
+
+    { INDEX_op_setcond_i32, { R, R, RI } },
+#if TCG_TARGET_REG_BITS == 64
+    { INDEX_op_setcond_i64, { R, R, RI } },
+#endif /* TCG_TARGET_REG_BITS == 64 */
+
+#if TCG_TARGET_REG_BITS == 32
+    /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
+    { INDEX_op_add2_i32, { R, R, R, R, R, R } },
+    { INDEX_op_sub2_i32, { R, R, R, R, R, R } },
+    { INDEX_op_brcond2_i32, { R, R, RI, RI } },
+    { INDEX_op_mulu2_i32, { R, R, R, R } },
+    { INDEX_op_setcond2_i32, { R, R, R, RI, RI } },
+#endif
+
+#if TCG_TARGET_HAS_not_i32
+    { INDEX_op_not_i32, { R, R } },
+#endif
+#if TCG_TARGET_HAS_neg_i32
+    { INDEX_op_neg_i32, { R, R } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+    { INDEX_op_ld8u_i64, { R, R } },
+    { INDEX_op_ld8s_i64, { R, R } },
+    { INDEX_op_ld16u_i64, { R, R } },
+    { INDEX_op_ld16s_i64, { R, R } },
+    { INDEX_op_ld32u_i64, { R, R } },
+    { INDEX_op_ld32s_i64, { R, R } },
+    { INDEX_op_ld_i64, { R, R } },
+
+    { INDEX_op_st8_i64, { R, R } },
+    { INDEX_op_st16_i64, { R, R } },
+    { INDEX_op_st32_i64, { R, R } },
+    { INDEX_op_st_i64, { R, R } },
+
+    { INDEX_op_add_i64, { R, RI, RI } },
+    { INDEX_op_sub_i64, { R, RI, RI } },
+    { INDEX_op_mul_i64, { R, RI, RI } },
+#if TCG_TARGET_HAS_div_i64
+    { INDEX_op_div_i64, { R, R, R } },
+    { INDEX_op_divu_i64, { R, R, R } },
+    { INDEX_op_rem_i64, { R, R, R } },
+    { INDEX_op_remu_i64, { R, R, R } },
+#elif TCG_TARGET_HAS_div2_i64
+    { INDEX_op_div2_i64, { R, R, "0", "1", R } },
+    { INDEX_op_divu2_i64, { R, R, "0", "1", R } },
+#endif
+    { INDEX_op_and_i64, { R, RI, RI } },
+#if TCG_TARGET_HAS_andc_i64
+    { INDEX_op_andc_i64, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_eqv_i64
+    { INDEX_op_eqv_i64, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_nand_i64
+    { INDEX_op_nand_i64, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_nor_i64
+    { INDEX_op_nor_i64, { R, RI, RI } },
+#endif
+    { INDEX_op_or_i64, { R, RI, RI } },
+#if TCG_TARGET_HAS_orc_i64
+    { INDEX_op_orc_i64, { R, RI, RI } },
+#endif
+    { INDEX_op_xor_i64, { R, RI, RI } },
+    { INDEX_op_shl_i64, { R, RI, RI } },
+    { INDEX_op_shr_i64, { R, RI, RI } },
+    { INDEX_op_sar_i64, { R, RI, RI } },
+#if TCG_TARGET_HAS_rot_i64
+    { INDEX_op_rotl_i64, { R, RI, RI } },
+    { INDEX_op_rotr_i64, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_deposit_i64
+    { INDEX_op_deposit_i64, { R, "0", R } },
+#endif
+    { INDEX_op_brcond_i64, { R, RI } },
+
+#if TCG_TARGET_HAS_ext8s_i64
+    { INDEX_op_ext8s_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext16s_i64
+    { INDEX_op_ext16s_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext32s_i64
+    { INDEX_op_ext32s_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext8u_i64
+    { INDEX_op_ext8u_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext16u_i64
+    { INDEX_op_ext16u_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext32u_i64
+    { INDEX_op_ext32u_i64, { R, R } },
+#endif
+    { INDEX_op_ext_i32_i64, { R, R } },
+    { INDEX_op_extu_i32_i64, { R, R } },
+#if TCG_TARGET_HAS_bswap16_i64
+    { INDEX_op_bswap16_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_bswap32_i64
+    { INDEX_op_bswap32_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_bswap64_i64
+    { INDEX_op_bswap64_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_not_i64
+    { INDEX_op_not_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_neg_i64
+    { INDEX_op_neg_i64, { R, R } },
+#endif
+#endif /* TCG_TARGET_REG_BITS == 64 */
+
+    { INDEX_op_qemu_ld_i32, { R, L } },
+    { INDEX_op_qemu_ld_i64, { R64, L } },
+
+    { INDEX_op_qemu_st_i32, { R, S } },
+    { INDEX_op_qemu_st_i64, { R64, S } },
+
+#if TCG_TARGET_HAS_ext8s_i32
+    { INDEX_op_ext8s_i32, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext16s_i32
+    { INDEX_op_ext16s_i32, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext8u_i32
+    { INDEX_op_ext8u_i32, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext16u_i32
+    { INDEX_op_ext16u_i32, { R, R } },
+#endif
+
+#if TCG_TARGET_HAS_bswap16_i32
+    { INDEX_op_bswap16_i32, { R, R } },
+#endif
+#if TCG_TARGET_HAS_bswap32_i32
+    { INDEX_op_bswap32_i32, { R, R } },
+#endif
+
+    { INDEX_op_mb, { } },
+    { -1 },
+};
+
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+    int i, n = ARRAY_SIZE(tcg_target_op_defs);
+
+    for (i = 0; i < n; ++i) {
+        if (tcg_target_op_defs[i].op == op) {
+            return &tcg_target_op_defs[i];
+        }
+    }
+    return NULL;
+}
+
+static const int tcg_target_reg_alloc_order[] = {
+    TCG_REG_R0,
+    TCG_REG_R1,
+    TCG_REG_R2,
+    TCG_REG_R3,
+#if 0 /* used for TCG_REG_CALL_STACK */
+    TCG_REG_R4,
+#endif
+    TCG_REG_R5,
+    TCG_REG_R6,
+    TCG_REG_R7,
+#if TCG_TARGET_NB_REGS >= 16
+    TCG_REG_R8,
+    TCG_REG_R9,
+    TCG_REG_R10,
+    TCG_REG_R11,
+    TCG_REG_R12,
+    TCG_REG_R13,
+    TCG_REG_R14,
+    TCG_REG_R15,
+#endif
+};
+
+#if MAX_OPC_PARAM_IARGS != 6
+# error Fix needed, number of supported input arguments changed!
+#endif
+
+static const int tcg_target_call_iarg_regs[] = {
+    TCG_REG_R0,
+    TCG_REG_R1,
+    TCG_REG_R2,
+    TCG_REG_R3,
+#if 0 /* used for TCG_REG_CALL_STACK */
+    TCG_REG_R4,
+#endif
+    TCG_REG_R5,
+    TCG_REG_R6,
+#if TCG_TARGET_REG_BITS == 32
+    /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
+    TCG_REG_R7,
+#if TCG_TARGET_NB_REGS >= 16
+    TCG_REG_R8,
+    TCG_REG_R9,
+    TCG_REG_R10,
+    TCG_REG_R11,
+    TCG_REG_R12,
+#else
+# error Too few input registers available
+#endif
+#endif
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+    TCG_REG_R0,
+#if TCG_TARGET_REG_BITS == 32
+    TCG_REG_R1
+#endif
+};
+
+#ifdef CONFIG_DEBUG_TCG
+static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+    "r00",
+    "r01",
+    "r02",
+    "r03",
+    "r04",
+    "r05",
+    "r06",
+    "r07",
+#if TCG_TARGET_NB_REGS >= 16
+    "r08",
+    "r09",
+    "r10",
+    "r11",
+    "r12",
+    "r13",
+    "r14",
+    "r15",
+#if TCG_TARGET_NB_REGS >= 32
+    "r16",
+    "r17",
+    "r18",
+    "r19",
+    "r20",
+    "r21",
+    "r22",
+    "r23",
+    "r24",
+    "r25",
+    "r26",
+    "r27",
+    "r28",
+    "r29",
+    "r30",
+    "r31"
+#endif
+#endif
+};
+#endif
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+                        intptr_t value, intptr_t addend)
+{
+    /* tcg_out_reloc always uses the same type, addend. */
+    tcg_debug_assert(type == sizeof(tcg_target_long));
+    tcg_debug_assert(addend == 0);
+    tcg_debug_assert(value != 0);
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_patch32(code_ptr, value);
+    } else {
+        tcg_patch64(code_ptr, value);
+    }
+    return true;
+}
+
+/* Parse target specific constraints. */
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+                                           const char *ct_str, TCGType type)
+{
+    switch (*ct_str++) {
+    case 'r':
+    case 'L':                   /* qemu_ld constraint */
+    case 'S':                   /* qemu_st constraint */
+        ct->ct |= TCG_CT_REG;
+        ct->u.regs = BIT(TCG_TARGET_NB_REGS) - 1;
+        break;
+    default:
+        return NULL;
+    }
+    return ct_str;
+}
+
+#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
+/* Show current bytecode. Used by tcg interpreter. */
+void tci_disas(uint8_t opc)
+{
+    const TCGOpDef *def = &tcg_op_defs[opc];
+    fprintf(stderr, "TCG %s %u, %u, %u\n",
+            def->name, def->nb_oargs, def->nb_iargs, def->nb_cargs);
+}
+#endif
+
+/* Write value (native size). */
+static void tcg_out_i(TCGContext *s, tcg_target_ulong v)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_out32(s, v);
+    } else {
+        tcg_out64(s, v);
+    }
+}
+
+/* Write opcode. */
+static void tcg_out_op_t(TCGContext *s, TCGOpcode op)
+{
+    tcg_out8(s, op);
+    tcg_out8(s, 0);
+}
+
+/* Write register. */
+static void tcg_out_r(TCGContext *s, TCGArg t0)
+{
+    tcg_debug_assert(t0 < TCG_TARGET_NB_REGS);
+    tcg_out8(s, t0);
+}
+
+/* Write register or constant (native size). */
+static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg)
+{
+    if (const_arg) {
+        tcg_debug_assert(const_arg == 1);
+        tcg_out8(s, TCG_CONST);
+        tcg_out_i(s, arg);
+    } else {
+        tcg_out_r(s, arg);
+    }
+}
+
+/* Write register or constant (32 bit). */
+static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg)
+{
+    if (const_arg) {
+        tcg_debug_assert(const_arg == 1);
+        tcg_out8(s, TCG_CONST);
+        tcg_out32(s, arg);
+    } else {
+        tcg_out_r(s, arg);
+    }
+}
+
+#if TCG_TARGET_REG_BITS == 64
+/* Write register or constant (64 bit). */
+static void tcg_out_ri64(TCGContext *s, int const_arg, TCGArg arg)
+{
+    if (const_arg) {
+        tcg_debug_assert(const_arg == 1);
+        tcg_out8(s, TCG_CONST);
+        tcg_out64(s, arg);
+    } else {
+        tcg_out_r(s, arg);
+    }
+}
+#endif
+
+/* Write label. */
+static void tci_out_label(TCGContext *s, TCGLabel *label)
+{
+    if (label->has_value) {
+        tcg_out_i(s, label->u.value);
+        tcg_debug_assert(label->u.value);
+    } else {
+        tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0);
+        s->code_ptr += sizeof(tcg_target_ulong);
+    }
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
+                       intptr_t arg2)
+{
+    uint8_t *old_code_ptr = s->code_ptr;
+    if (type == TCG_TYPE_I32) {
+        tcg_out_op_t(s, INDEX_op_ld_i32);
+        tcg_out_r(s, ret);
+        tcg_out_r(s, arg1);
+        tcg_out32(s, arg2);
+    } else {
+        tcg_debug_assert(type == TCG_TYPE_I64);
+#if TCG_TARGET_REG_BITS == 64
+        tcg_out_op_t(s, INDEX_op_ld_i64);
+        tcg_out_r(s, ret);
+        tcg_out_r(s, arg1);
+        tcg_debug_assert(arg2 == (int32_t)arg2);
+        tcg_out32(s, arg2);
+#else
+        TODO();
+#endif
+    }
+    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_debug_assert(ret != arg);
+#if TCG_TARGET_REG_BITS == 32
+    tcg_out_op_t(s, INDEX_op_mov_i32);
+#else
+    tcg_out_op_t(s, INDEX_op_mov_i64);
+#endif
+    tcg_out_r(s, ret);
+    tcg_out_r(s, arg);
+    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    return true;
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         TCGReg t0, tcg_target_long arg)
+{
+    uint8_t *old_code_ptr = s->code_ptr;
+    uint32_t arg32 = arg;
+    if (type == TCG_TYPE_I32 || arg == arg32) {
+        tcg_out_op_t(s, INDEX_op_movi_i32);
+        tcg_out_r(s, t0);
+        tcg_out32(s, arg32);
+    } else {
+        tcg_debug_assert(type == TCG_TYPE_I64);
+#if TCG_TARGET_REG_BITS == 64
+        tcg_out_op_t(s, INDEX_op_movi_i64);
+        tcg_out_r(s, t0);
+        tcg_out64(s, arg);
+#else
+        TODO();
+#endif
+    }
+    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
+{
+    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_out_op_t(s, INDEX_op_call);
+    tcg_out_ri(s, 1, (uintptr_t)arg);
+    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
+                       const int *const_args)
+{
+    uint8_t *old_code_ptr = s->code_ptr;
+
+    tcg_out_op_t(s, opc);
+
+    switch (opc) {
+    case INDEX_op_exit_tb:
+        tcg_out64(s, args[0]);
+        break;
+    case INDEX_op_goto_tb:
+        if (s->tb_jmp_insn_offset) {
+            /* Direct jump method. */
+            /* Align for atomic patching and thread safety */
+            s->code_ptr = QEMU_ALIGN_PTR_UP(s->code_ptr, 4);
+            s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
+            tcg_out32(s, 0);
+        } else {
+            /* Indirect jump method. */
+            TODO();
+        }
+        set_jmp_reset_offset(s, args[0]);
+        break;
+    case INDEX_op_br:
+        tci_out_label(s, arg_label(args[0]));
+        break;
+    case INDEX_op_setcond_i32:
+        tcg_out_r(s, args[0]);
+        tcg_out_r(s, args[1]);
+        tcg_out_ri32(s, const_args[2], args[2]);
+        tcg_out8(s, args[3]);   /* condition */
+        break;
+#if TCG_TARGET_REG_BITS == 32
+    case INDEX_op_setcond2_i32:
+        /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */
+        tcg_out_r(s, args[0]);
+        tcg_out_r(s, args[1]);
+        tcg_out_r(s, args[2]);
+        tcg_out_ri32(s, const_args[3], args[3]);
+        tcg_out_ri32(s, const_args[4], args[4]);
+        tcg_out8(s, args[5]);   /* condition */
+        break;
+#elif TCG_TARGET_REG_BITS == 64
+    case INDEX_op_setcond_i64:
+        tcg_out_r(s, args[0]);
+        tcg_out_r(s, args[1]);
+        tcg_out_ri64(s, const_args[2], args[2]);
+        tcg_out8(s, args[3]);   /* condition */
+        break;
+#endif
+    case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8s_i32:
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld_i32:
+    case INDEX_op_st8_i32:
+    case INDEX_op_st16_i32:
+    case INDEX_op_st_i32:
+    case INDEX_op_ld8u_i64:
+    case INDEX_op_ld8s_i64:
+    case INDEX_op_ld16u_i64:
+    case INDEX_op_ld16s_i64:
+    case INDEX_op_ld32u_i64:
+    case INDEX_op_ld32s_i64:
+    case INDEX_op_ld_i64:
+    case INDEX_op_st8_i64:
+    case INDEX_op_st16_i64:
+    case INDEX_op_st32_i64:
+    case INDEX_op_st_i64:
+        tcg_out_r(s, args[0]);
+        tcg_out_r(s, args[1]);
+        tcg_debug_assert(args[2] == (int32_t)args[2]);
+        tcg_out32(s, args[2]);
+        break;
+    case INDEX_op_add_i32:
+    case INDEX_op_sub_i32:
+    case INDEX_op_mul_i32:
+    case INDEX_op_and_i32:
+    case INDEX_op_andc_i32:     /* Optional (TCG_TARGET_HAS_andc_i32). */
+    case INDEX_op_eqv_i32:      /* Optional (TCG_TARGET_HAS_eqv_i32). */
+    case INDEX_op_nand_i32:     /* Optional (TCG_TARGET_HAS_nand_i32). */
+    case INDEX_op_nor_i32:      /* Optional (TCG_TARGET_HAS_nor_i32). */
+    case INDEX_op_or_i32:
+    case INDEX_op_orc_i32:      /* Optional (TCG_TARGET_HAS_orc_i32). */
+    case INDEX_op_xor_i32:
+    case INDEX_op_shl_i32:
+    case INDEX_op_shr_i32:
+    case INDEX_op_sar_i32:
+    case INDEX_op_rotl_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
+    case INDEX_op_rotr_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
+        tcg_out_r(s, args[0]);
+        tcg_out_ri32(s, const_args[1], args[1]);
+        tcg_out_ri32(s, const_args[2], args[2]);
+        break;
+    case INDEX_op_deposit_i32:  /* Optional (TCG_TARGET_HAS_deposit_i32). */
+        tcg_out_r(s, args[0]);
+        tcg_out_r(s, args[1]);
+        tcg_out_r(s, args[2]);
+        tcg_debug_assert(args[3] <= UINT8_MAX);
+        tcg_out8(s, args[3]);
+        tcg_debug_assert(args[4] <= UINT8_MAX);
+        tcg_out8(s, args[4]);
+        break;
+
+#if TCG_TARGET_REG_BITS == 64
+    case INDEX_op_add_i64:
+    case INDEX_op_sub_i64:
+    case INDEX_op_mul_i64:
+    case INDEX_op_and_i64:
+    case INDEX_op_andc_i64:     /* Optional (TCG_TARGET_HAS_andc_i64). */
+    case INDEX_op_eqv_i64:      /* Optional (TCG_TARGET_HAS_eqv_i64). */
+    case INDEX_op_nand_i64:     /* Optional (TCG_TARGET_HAS_nand_i64). */
+    case INDEX_op_nor_i64:      /* Optional (TCG_TARGET_HAS_nor_i64). */
+    case INDEX_op_or_i64:
+    case INDEX_op_orc_i64:      /* Optional (TCG_TARGET_HAS_orc_i64). */
+    case INDEX_op_xor_i64:
+    case INDEX_op_shl_i64:
+    case INDEX_op_shr_i64:
+    case INDEX_op_sar_i64:
+    case INDEX_op_rotl_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
+    case INDEX_op_rotr_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
+        tcg_out_r(s, args[0]);
+        tcg_out_ri64(s, const_args[1], args[1]);
+        tcg_out_ri64(s, const_args[2], args[2]);
+        break;
+    case INDEX_op_deposit_i64:  /* Optional (TCG_TARGET_HAS_deposit_i64). */
+        tcg_out_r(s, args[0]);
+        tcg_out_r(s, args[1]);
+        tcg_out_r(s, args[2]);
+        tcg_debug_assert(args[3] <= UINT8_MAX);
+        tcg_out8(s, args[3]);
+        tcg_debug_assert(args[4] <= UINT8_MAX);
+        tcg_out8(s, args[4]);
+        break;
+    case INDEX_op_div_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */
+    case INDEX_op_divu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */
+    case INDEX_op_rem_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */
+    case INDEX_op_remu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */
+        TODO();
+        break;
+    case INDEX_op_div2_i64:     /* Optional (TCG_TARGET_HAS_div2_i64). */
+    case INDEX_op_divu2_i64:    /* Optional (TCG_TARGET_HAS_div2_i64). */
+        TODO();
+        break;
+    case INDEX_op_brcond_i64:
+        tcg_out_r(s, args[0]);
+        tcg_out_ri64(s, const_args[1], args[1]);
+        tcg_out8(s, args[2]);           /* condition */
+        tci_out_label(s, arg_label(args[3]));
+        break;
+    case INDEX_op_bswap16_i64:  /* Optional (TCG_TARGET_HAS_bswap16_i64). */
+    case INDEX_op_bswap32_i64:  /* Optional (TCG_TARGET_HAS_bswap32_i64). */
+    case INDEX_op_bswap64_i64:  /* Optional (TCG_TARGET_HAS_bswap64_i64). */
+    case INDEX_op_not_i64:      /* Optional (TCG_TARGET_HAS_not_i64). */
+    case INDEX_op_neg_i64:      /* Optional (TCG_TARGET_HAS_neg_i64). */
+    case INDEX_op_ext8s_i64:    /* Optional (TCG_TARGET_HAS_ext8s_i64). */
+    case INDEX_op_ext8u_i64:    /* Optional (TCG_TARGET_HAS_ext8u_i64). */
+    case INDEX_op_ext16s_i64:   /* Optional (TCG_TARGET_HAS_ext16s_i64). */
+    case INDEX_op_ext16u_i64:   /* Optional (TCG_TARGET_HAS_ext16u_i64). */
+    case INDEX_op_ext32s_i64:   /* Optional (TCG_TARGET_HAS_ext32s_i64). */
+    case INDEX_op_ext32u_i64:   /* Optional (TCG_TARGET_HAS_ext32u_i64). */
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_extu_i32_i64:
+#endif /* TCG_TARGET_REG_BITS == 64 */
+    case INDEX_op_neg_i32:      /* Optional (TCG_TARGET_HAS_neg_i32). */
+    case INDEX_op_not_i32:      /* Optional (TCG_TARGET_HAS_not_i32). */
+    case INDEX_op_ext8s_i32:    /* Optional (TCG_TARGET_HAS_ext8s_i32). */
+    case INDEX_op_ext16s_i32:   /* Optional (TCG_TARGET_HAS_ext16s_i32). */
+    case INDEX_op_ext8u_i32:    /* Optional (TCG_TARGET_HAS_ext8u_i32). */
+    case INDEX_op_ext16u_i32:   /* Optional (TCG_TARGET_HAS_ext16u_i32). */
+    case INDEX_op_bswap16_i32:  /* Optional (TCG_TARGET_HAS_bswap16_i32). */
+    case INDEX_op_bswap32_i32:  /* Optional (TCG_TARGET_HAS_bswap32_i32). */
+        tcg_out_r(s, args[0]);
+        tcg_out_r(s, args[1]);
+        break;
+    case INDEX_op_div_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
+    case INDEX_op_divu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */
+    case INDEX_op_rem_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
+    case INDEX_op_remu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */
+        tcg_out_r(s, args[0]);
+        tcg_out_ri32(s, const_args[1], args[1]);
+        tcg_out_ri32(s, const_args[2], args[2]);
+        break;
+    case INDEX_op_div2_i32:     /* Optional (TCG_TARGET_HAS_div2_i32). */
+    case INDEX_op_divu2_i32:    /* Optional (TCG_TARGET_HAS_div2_i32). */
+        TODO();
+        break;
+#if TCG_TARGET_REG_BITS == 32
+    case INDEX_op_add2_i32:
+    case INDEX_op_sub2_i32:
+        tcg_out_r(s, args[0]);
+        tcg_out_r(s, args[1]);
+        tcg_out_r(s, args[2]);
+        tcg_out_r(s, args[3]);
+        tcg_out_r(s, args[4]);
+        tcg_out_r(s, args[5]);
+        break;
+    case INDEX_op_brcond2_i32:
+        tcg_out_r(s, args[0]);
+        tcg_out_r(s, args[1]);
+        tcg_out_ri32(s, const_args[2], args[2]);
+        tcg_out_ri32(s, const_args[3], args[3]);
+        tcg_out8(s, args[4]);           /* condition */
+        tci_out_label(s, arg_label(args[5]));
+        break;
+    case INDEX_op_mulu2_i32:
+        tcg_out_r(s, args[0]);
+        tcg_out_r(s, args[1]);
+        tcg_out_r(s, args[2]);
+        tcg_out_r(s, args[3]);
+        break;
+#endif
+    case INDEX_op_brcond_i32:
+        tcg_out_r(s, args[0]);
+        tcg_out_ri32(s, const_args[1], args[1]);
+        tcg_out8(s, args[2]);           /* condition */
+        tci_out_label(s, arg_label(args[3]));
+        break;
+    case INDEX_op_qemu_ld_i32:
+        tcg_out_r(s, *args++);
+        tcg_out_r(s, *args++);
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
+        break;
+    case INDEX_op_qemu_ld_i64:
+        tcg_out_r(s, *args++);
+        if (TCG_TARGET_REG_BITS == 32) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_r(s, *args++);
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
+        break;
+    case INDEX_op_qemu_st_i32:
+        tcg_out_r(s, *args++);
+        tcg_out_r(s, *args++);
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
+        break;
+    case INDEX_op_qemu_st_i64:
+        tcg_out_r(s, *args++);
+        if (TCG_TARGET_REG_BITS == 32) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_r(s, *args++);
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
+        break;
+    case INDEX_op_mb:
+        break;
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_movi_i64:
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
+    default:
+        tcg_abort();
+    }
+    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
+                       intptr_t arg2)
+{
+    uint8_t *old_code_ptr = s->code_ptr;
+    if (type == TCG_TYPE_I32) {
+        tcg_out_op_t(s, INDEX_op_st_i32);
+        tcg_out_r(s, arg);
+        tcg_out_r(s, arg1);
+        tcg_out32(s, arg2);
+    } else {
+        tcg_debug_assert(type == TCG_TYPE_I64);
+#if TCG_TARGET_REG_BITS == 64
+        tcg_out_op_t(s, INDEX_op_st_i64);
+        tcg_out_r(s, arg);
+        tcg_out_r(s, arg1);
+        tcg_out32(s, arg2);
+#else
+        TODO();
+#endif
+    }
+    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+                               TCGReg base, intptr_t ofs)
+{
+    return false;
+}
+
+/* Test if a constant matches the constraint. */
+static int tcg_target_const_match(tcg_target_long val, TCGType type,
+                                  const TCGArgConstraint *arg_ct)
+{
+    /* No need to return 0 or 1, 0 or != 0 is good enough. */
+    return arg_ct->ct & TCG_CT_CONST;
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
+    const char *envval = getenv("DEBUG_TCG");
+    if (envval) {
+        qemu_set_log(strtol(envval, NULL, 0));
+    }
+#endif
+
+    /* The current code uses uint8_t for tcg operations. */
+    tcg_debug_assert(s->tcg_op_defs_max <= UINT8_MAX);
+
+    /* Registers available for 32 bit operations. */
+    s->tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1;
+    /* Registers available for 64 bit operations. */
+    s->tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1;
+    /* TODO: Which registers should be set here? */
+    s->tcg_target_call_clobber_regs = BIT(TCG_TARGET_NB_REGS) - 1;
+
+    s->reserved_regs = 0;
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+
+    /* We use negative offsets from "sp" so that we can distinguish
+       stores that might pretend to be call arguments.  */
+    tcg_set_frame(s, TCG_REG_CALL_STACK,
+                  -CPU_TEMP_BUF_NLONGS * sizeof(long),
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
+}
+
+/* Generate global QEMU prologue and epilogue code. */
+static inline void tcg_target_qemu_prologue(TCGContext *s)
+{
+}

From cb5aab0feff6cba55b6845d7d49347d2b6ea65a2 Mon Sep 17 00:00:00 2001
From: Amaan Qureshi <amaanq12@gmail.com>
Date: Sun, 6 Apr 2025 02:33:36 -0400
Subject: [PATCH 3/4] squash avr & rh850 (#2146)

* Squashed commit of #2021

* Squashed commit of #1918

---------

Co-authored-by: Glenn Baker <glenn.baker@gmx.com>
Co-authored-by: Damien Cauquil <dcauquil@quarkslab.com>
---
 CMakeLists.txt                                |  104 +-
 CREDITS.TXT                                   |    1 +
 Cargo.toml                                    |    4 +-
 README.md                                     |    2 +-
 bindings/const_generator.py                   |   16 +-
 bindings/dotnet/UnicornEngine/Const/AVR.fs    |  155 +
 bindings/dotnet/UnicornEngine/Const/Common.fs |    5 +-
 bindings/dotnet/UnicornEngine/Const/Rh850.fs  |   99 +
 bindings/go/unicorn/avr_const.go              |  150 +
 bindings/go/unicorn/rh850_const.go            |   94 +
 bindings/go/unicorn/unicorn_const.go          |    7 +-
 .../java/src/main/java/unicorn/AVRConst.java  |  153 +
 .../src/main/java/unicorn/Rh850Const.java     |   97 +
 .../src/main/java/unicorn/UnicornConst.java   |    5 +-
 bindings/pascal/unicorn/AVRConst.pas          |  155 +
 bindings/pascal/unicorn/Rh850Const.pas        |   99 +
 bindings/pascal/unicorn/UnicornConst.pas      |    7 +-
 bindings/python/sample_rh850.py               |   70 +
 bindings/python/unicorn/__init__.py           |    2 +-
 bindings/python/unicorn/avr_const.py          |  147 +
 bindings/python/unicorn/rh850_const.py        |   91 +
 bindings/python/unicorn/unicorn_const.py      |    5 +-
 .../lib/unicorn_engine/avr_const.rb           |  150 +
 .../lib/unicorn_engine/rh850_const.rb         |   94 +
 .../lib/unicorn_engine/unicorn_const.rb       |    7 +-
 bindings/rust/build.rs                        |    6 +
 bindings/rust/src/avr.rs                      |  211 +
 bindings/rust/src/lib.rs                      |   16 +
 bindings/rust/src/rh850.rs                    |  119 +
 bindings/rust/src/unicorn_const.rs            |    6 +-
 bindings/zig/unicorn/AVR_const.zig            |  151 +
 bindings/zig/unicorn/rh850_const.zig          |   95 +
 bindings/zig/unicorn/unicorn_const.zig        |    5 +-
 build.zig                                     |    1 +
 format.sh                                     |    0
 include/uc_priv.h                             |    1 +
 include/unicorn/avr.h                         |  189 +
 include/unicorn/rh850.h                       |  111 +
 include/unicorn/unicorn.h                     |    7 +
 msvc/avr-softmmu/config-target.h              |    5 +
 msvc/rh850-softmmu/config-target.h            |    6 +
 qemu/MAINTAINERS                              |    9 +
 qemu/avr.h                                    | 1297 ++++
 qemu/configure                                |   21 +-
 qemu/include/tcg/tcg.h                        |   19 +-
 qemu/rh850.h                                  | 1294 ++++
 qemu/target/avr/cpu-param.h                   |   36 +
 qemu/target/avr/cpu-qom.h                     |   56 +
 qemu/target/avr/cpu.c                         |  459 ++
 qemu/target/avr/cpu.h                         |  274 +
 qemu/target/avr/decode-insn.c.inc             | 1097 ++++
 qemu/target/avr/gdbstub.c                     |   84 +
 qemu/target/avr/helper.c                      |  373 ++
 qemu/target/avr/helper.h                      |   37 +
 qemu/target/avr/insn.decode                   |  187 +
 qemu/target/avr/machine.c                     |  119 +
 qemu/target/avr/translate.c                   | 3270 +++++++++++
 qemu/target/avr/unicorn.c                     |  280 +
 qemu/target/avr/unicorn.h                     |   21 +
 qemu/target/avr/unicorn_helper.h              |  165 +
 qemu/target/rh850/Makefile.objs               |    1 +
 qemu/target/rh850/cpu-param.h                 |   11 +
 qemu/target/rh850/cpu.c                       |  473 ++
 qemu/target/rh850/cpu.h                       |  276 +
 qemu/target/rh850/cpu_bits.h                  |  431 ++
 qemu/target/rh850/cpu_user.h                  |   13 +
 qemu/target/rh850/fpu_helper.c                |  823 +++
 qemu/target/rh850/fpu_translate.c             | 1557 +++++
 qemu/target/rh850/fpu_translate.h             |   41 +
 qemu/target/rh850/gdbstub.c                   |  169 +
 qemu/target/rh850/helper.c                    |  539 ++
 qemu/target/rh850/helper.h                    |  157 +
 qemu/target/rh850/instmap.h                   |  624 ++
 qemu/target/rh850/op_helper.c                 |   89 +
 qemu/target/rh850/pmp.c                       |  379 ++
 qemu/target/rh850/pmp.h                       |   64 +
 qemu/target/rh850/register_indices.h          |   63 +
 qemu/target/rh850/translate.c                 | 5190 +++++++++++++++++
 qemu/target/rh850/translate.h                 |   35 +
 qemu/target/rh850/unicorn.c                   |  140 +
 qemu/target/rh850/unicorn.h                   |   16 +
 samples/Makefile                              |    3 +
 samples/sample_avr.c                          |  131 +
 samples/sample_rh850.c                        |  118 +
 symbols.sh                                    |   22 +-
 tests/unit/test_avr.c                         |  268 +
 tests/unit/test_rh850.c                       |   40 +
 uc.c                                          |   56 +
 88 files changed, 23454 insertions(+), 21 deletions(-)
 create mode 100644 bindings/dotnet/UnicornEngine/Const/AVR.fs
 create mode 100644 bindings/dotnet/UnicornEngine/Const/Rh850.fs
 create mode 100644 bindings/go/unicorn/avr_const.go
 create mode 100644 bindings/go/unicorn/rh850_const.go
 create mode 100644 bindings/java/src/main/java/unicorn/AVRConst.java
 create mode 100644 bindings/java/src/main/java/unicorn/Rh850Const.java
 create mode 100644 bindings/pascal/unicorn/AVRConst.pas
 create mode 100644 bindings/pascal/unicorn/Rh850Const.pas
 create mode 100644 bindings/python/sample_rh850.py
 create mode 100644 bindings/python/unicorn/avr_const.py
 create mode 100644 bindings/python/unicorn/rh850_const.py
 create mode 100644 bindings/ruby/unicorn_gem/lib/unicorn_engine/avr_const.rb
 create mode 100644 bindings/ruby/unicorn_gem/lib/unicorn_engine/rh850_const.rb
 create mode 100644 bindings/rust/src/avr.rs
 create mode 100644 bindings/rust/src/rh850.rs
 create mode 100644 bindings/zig/unicorn/AVR_const.zig
 create mode 100644 bindings/zig/unicorn/rh850_const.zig
 mode change 100644 => 100755 format.sh
 create mode 100644 include/unicorn/avr.h
 create mode 100644 include/unicorn/rh850.h
 create mode 100644 msvc/avr-softmmu/config-target.h
 create mode 100644 msvc/rh850-softmmu/config-target.h
 create mode 100644 qemu/avr.h
 create mode 100644 qemu/rh850.h
 create mode 100644 qemu/target/avr/cpu-param.h
 create mode 100644 qemu/target/avr/cpu-qom.h
 create mode 100644 qemu/target/avr/cpu.c
 create mode 100644 qemu/target/avr/cpu.h
 create mode 100644 qemu/target/avr/decode-insn.c.inc
 create mode 100644 qemu/target/avr/gdbstub.c
 create mode 100644 qemu/target/avr/helper.c
 create mode 100644 qemu/target/avr/helper.h
 create mode 100644 qemu/target/avr/insn.decode
 create mode 100644 qemu/target/avr/machine.c
 create mode 100644 qemu/target/avr/translate.c
 create mode 100644 qemu/target/avr/unicorn.c
 create mode 100644 qemu/target/avr/unicorn.h
 create mode 100644 qemu/target/avr/unicorn_helper.h
 create mode 100644 qemu/target/rh850/Makefile.objs
 create mode 100644 qemu/target/rh850/cpu-param.h
 create mode 100644 qemu/target/rh850/cpu.c
 create mode 100644 qemu/target/rh850/cpu.h
 create mode 100644 qemu/target/rh850/cpu_bits.h
 create mode 100644 qemu/target/rh850/cpu_user.h
 create mode 100644 qemu/target/rh850/fpu_helper.c
 create mode 100644 qemu/target/rh850/fpu_translate.c
 create mode 100644 qemu/target/rh850/fpu_translate.h
 create mode 100644 qemu/target/rh850/gdbstub.c
 create mode 100644 qemu/target/rh850/helper.c
 create mode 100644 qemu/target/rh850/helper.h
 create mode 100644 qemu/target/rh850/instmap.h
 create mode 100644 qemu/target/rh850/op_helper.c
 create mode 100644 qemu/target/rh850/pmp.c
 create mode 100644 qemu/target/rh850/pmp.h
 create mode 100644 qemu/target/rh850/register_indices.h
 create mode 100644 qemu/target/rh850/translate.c
 create mode 100644 qemu/target/rh850/translate.h
 create mode 100644 qemu/target/rh850/unicorn.c
 create mode 100644 qemu/target/rh850/unicorn.h
 create mode 100644 samples/sample_avr.c
 create mode 100644 samples/sample_rh850.c
 create mode 100644 tests/unit/test_avr.c
 create mode 100644 tests/unit/test_rh850.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 78db47b688..6a266c4d76 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -86,7 +86,7 @@ option(UNICORN_FUZZ "Enable fuzzing" OFF)
 option(UNICORN_LOGGING "Enable logging" OFF)
 option(UNICORN_BUILD_TESTS "Build unicorn tests" ${PROJECT_IS_TOP_LEVEL})
 option(UNICORN_INSTALL "Enable unicorn installation" ${PROJECT_IS_TOP_LEVEL})
-set(UNICORN_ARCH "x86;arm;aarch64;riscv;mips;sparc;m68k;ppc;s390x;tricore" CACHE STRING "Enabled unicorn architectures")
+set(UNICORN_ARCH "x86;arm;aarch64;riscv;mips;sparc;m68k;ppc;rh850;s390x;tricore;avr" CACHE STRING "Enabled unicorn architectures")
 option(UNICORN_TRACER "Trace unicorn execution" OFF)
 option(UNICORN_INTERPRETER "Use interpreter mode" OFF)
 
@@ -274,6 +274,11 @@ else()
                 set(UNICORN_TARGET_ARCH "tricore")
                 break()
             endif()
+            string(FIND ${UC_COMPILER_MACRO} "__AVR__" UC_RET)
+            if (${UC_RET} GREATER_EQUAL "0")
+                set(UNICORN_TARGET_ARCH "avr")
+                break()
+            endif()
             message(FATAL_ERROR "Unknown host compiler: ${CMAKE_C_COMPILER}.")
         endwhile(TRUE)
     endif()
@@ -304,6 +309,9 @@ else()
     if(UNICORN_HAS_PPC)
         set(EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_PPC ")
     endif()
+    if(UNICORN_HAS_RH850)
+        set(EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_RH850 ")
+    endif()
     if(UNICORN_HAS_RISCV)
         set(EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_RISCV ")
     endif()
@@ -313,6 +321,9 @@ else()
     if (UNICORN_HAS_TRICORE)
         set (EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_TRICORE ")
     endif()
+    if (UNICORN_HAS_AVR)
+        set (EXTRA_CFLAGS "${EXTRA_CFLAGS}-DUNICORN_HAS_AVR ")
+    endif()
 
     set(EXTRA_CFLAGS "${EXTRA_CFLAGS}-fPIC")
     if(ANDROID_ABI)
@@ -355,6 +366,9 @@ else()
     if(UNICORN_HAS_PPC)
         set(TARGET_LIST "${TARGET_LIST}ppc-softmmu, ppc64-softmmu, ")
     endif()
+    if(UNICORN_HAS_RH850)
+        set(TARGET_LIST "${TARGET_LIST}rh850-softmmu, ")
+    endif()
     if(UNICORN_HAS_RISCV)
         set(TARGET_LIST "${TARGET_LIST}riscv32-softmmu, riscv64-softmmu, ")
     endif()
@@ -364,6 +378,9 @@ else()
     if (UNICORN_HAS_TRICORE)
         set (TARGET_LIST "${TARGET_LIST}tricore-softmmu, ")
     endif()
+    if (UNICORN_HAS_AVR)
+        set (TARGET_LIST "${TARGET_LIST}avr-softmmu, ")
+    endif()
     set(TARGET_LIST "${TARGET_LIST} ")
 
     # GEN config-host.mak & target directories
@@ -446,6 +463,12 @@ else()
             OUTPUT_FILE ${CMAKE_BINARY_DIR}/ppc64-softmmu/config-target.h
         )
     endif()
+    if(UNICORN_HAS_RH850)
+        execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/qemu/scripts/create_config
+            INPUT_FILE ${CMAKE_BINARY_DIR}/rh850-softmmu/config-target.mak
+            OUTPUT_FILE ${CMAKE_BINARY_DIR}/rh850-softmmu/config-target.h
+        )
+    endif()
     if(UNICORN_HAS_RISCV)
         execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/qemu/scripts/create_config
             INPUT_FILE ${CMAKE_BINARY_DIR}/riscv32-softmmu/config-target.mak
@@ -468,6 +491,12 @@ else()
             OUTPUT_FILE ${CMAKE_BINARY_DIR}/tricore-softmmu/config-target.h
         )
     endif()
+    if (UNICORN_HAS_AVR)
+        execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/qemu/scripts/create_config
+            INPUT_FILE ${CMAKE_BINARY_DIR}/avr-softmmu/config-target.mak
+            OUTPUT_FILE ${CMAKE_BINARY_DIR}/avr-softmmu/config-target.h
+        )
+    endif()
     add_compile_options(
         ${UNICORN_CFLAGS}
         -I${CMAKE_CURRENT_SOURCE_DIR}/qemu/tcg/${UNICORN_TARGET_ARCH}
@@ -1174,6 +1203,65 @@ endif()
 endif()
 
 
+if (UNICORN_HAS_AVR)
+add_library(avr-softmmu STATIC
+    ${UNICORN_ARCH_COMMON}
+
+    qemu/target/avr/cpu.c
+    qemu/target/avr/helper.c
+    qemu/target/avr/translate.c
+    qemu/target/avr/unicorn.c
+)
+
+if(MSVC)
+    target_compile_options(avr-softmmu PRIVATE
+        -DNEED_CPU_H
+        /FIavr.h
+        /I${CMAKE_CURRENT_SOURCE_DIR}/msvc/avr-softmmu
+        /I${CMAKE_CURRENT_SOURCE_DIR}/qemu/target/avr
+    )
+else()
+    target_compile_options(avr-softmmu PRIVATE
+        -DNEED_CPU_H
+        -include avr.h
+        -I${CMAKE_BINARY_DIR}/avr-softmmu
+        -I${CMAKE_CURRENT_SOURCE_DIR}/qemu/target/avr
+    )
+endif()
+endif()
+
+
+if (UNICORN_HAS_RH850)
+add_library(rh850-softmmu STATIC
+    ${UNICORN_ARCH_COMMON}
+
+    qemu/target/rh850/cpu.c
+    qemu/target/rh850/fpu_helper.c
+    qemu/target/rh850/helper.c
+    qemu/target/rh850/op_helper.c
+    qemu/target/rh850/translate.c
+    qemu/target/rh850/fpu_translate.c
+    qemu/target/rh850/unicorn.c
+)
+
+
+if(MSVC)
+    target_compile_options(rh850-softmmu PRIVATE
+        -DNEED_CPU_H
+        /FIrh850.h
+        /I${CMAKE_CURRENT_SOURCE_DIR}/msvc/rh850-softmmu
+        /I${CMAKE_CURRENT_SOURCE_DIR}/qemu/target/rh850
+    )
+else()
+    target_compile_options(rh850-softmmu PRIVATE
+        -DNEED_CPU_H
+        -include rh850.h
+        -I${CMAKE_BINARY_DIR}/rh850-softmmu
+        -I${CMAKE_CURRENT_SOURCE_DIR}/qemu/target/rh850
+    )
+endif()
+endif()
+
 set(UNICORN_SRCS
     uc.c
 
@@ -1326,6 +1414,13 @@ if(UNICORN_HAS_PPC)
     target_link_libraries(ppc64-softmmu PRIVATE unicorn-common)
     set(UNICORN_TEST_FILE ${UNICORN_TEST_FILE} test_ppc)
 endif()
+if(UNICORN_HAS_RH850)
+    set(UNICORN_COMPILE_OPTIONS ${UNICORN_COMPILE_OPTIONS} -DUNICORN_HAS_RH850)
+    set(UNICORN_LINK_LIBRARIES ${UNICORN_LINK_LIBRARIES} rh850-softmmu rh850-softmmu)
+    set(UNICORN_SAMPLE_FILE ${UNICORN_SAMPLE_FILE} sample_rh850)
+    target_link_libraries(rh850-softmmu PRIVATE unicorn-common)
+    set(UNICORN_TEST_FILE ${UNICORN_TEST_FILE} test_rh850)
+endif()
 if(UNICORN_HAS_RISCV)
     set(UNICORN_COMPILE_OPTIONS ${UNICORN_COMPILE_OPTIONS} -DUNICORN_HAS_RISCV)
     set(UNICORN_LINK_LIBRARIES ${UNICORN_LINK_LIBRARIES} riscv32-softmmu riscv64-softmmu)
@@ -1348,6 +1443,13 @@ if (UNICORN_HAS_TRICORE)
     target_link_libraries(tricore-softmmu unicorn-common)
     set(UNICORN_TEST_FILE ${UNICORN_TEST_FILE} test_tricore)
 endif()
+if (UNICORN_HAS_AVR)
+    set(UNICORN_COMPILE_OPTIONS ${UNICORN_COMPILE_OPTIONS} -DUNICORN_HAS_AVR)
+    set(UNICORN_LINK_LIBRARIES ${UNICORN_LINK_LIBRARIES} avr-softmmu)
+    set(UNICORN_SAMPLE_FILE ${UNICORN_SAMPLE_FILE} sample_avr)
+    target_link_libraries(avr-softmmu unicorn-common)
+    set(UNICORN_TEST_FILE ${UNICORN_TEST_FILE} test_avr)
+endif()
 
 # Extra tests
 set(UNICORN_TEST_FILE ${UNICORN_TEST_FILE} test_mem)
diff --git a/CREDITS.TXT b/CREDITS.TXT
index ee443858b7..f2dc6118a0 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -81,3 +81,4 @@ Ziqiao Kong (lazymio): uc_context_free() API and various bug fix & improvement.
 Sven Almgren (blindmatrix): bug fix
 Chenxu Wu (kabeor): Documentation
 Philipp Takacs: virtual tlb, memory snapshots
+Glenn Baker: AVR architecture support
diff --git a/Cargo.toml b/Cargo.toml
index 2136e417a6..46800fbc63 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,7 +42,7 @@ pkg-config = { version = "0.3" }
 [features]
 default = ["arch_all"]
 dynamic_linkage = []
-arch_all = ["arch_x86", "arch_arm", "arch_aarch64", "arch_riscv", "arch_mips", "arch_sparc", "arch_m68k", "arch_ppc", "arch_s390x", "arch_tricore"]
+arch_all = ["arch_x86", "arch_arm", "arch_aarch64", "arch_riscv", "arch_mips", "arch_sparc", "arch_m68k", "arch_ppc", "arch_rh850", "arch_s390x", "arch_tricore", "arch_avr"]
 arch_x86 = []
 arch_arm = []
 # NOTE: unicorn-c only separates on top-level arch name,
@@ -55,3 +55,5 @@ arch_m68k = []
 arch_ppc = []
 arch_s390x = []
 arch_tricore = []
+arch_avr = []
+arch_rh850 = []
diff --git a/README.md b/README.md
index 293d05db01..cc07f1d4ce 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ Unicorn is a lightweight, multi-platform, multi-architecture CPU emulator framew
 
 Unicorn offers some unparalleled features:
 
-- Multi-architecture: ARM, ARM64 (ARMv8), M68K, MIPS, PowerPC, RISCV, SPARC, S390X, TriCore and X86 (16, 32, 64-bit)
+- Multi-architecture: ARM, ARM64 (ARMv8), AVR, M68K, MIPS, PowerPC, RISCV, SPARC, S390X, TriCore and X86 (16, 32, 64-bit)
 - Clean/simple/lightweight/intuitive architecture-neutral API
 - Implemented in pure C language, with bindings for Crystal, Clojure, Visual Basic, Perl, Rust, Ruby, Python, Java, .NET, Go, Delphi/Free Pascal, Haskell, Pharo, Lua and Zig.
 - Native support for Windows & *nix (with Mac OSX, Linux, Android, *BSD & Solaris confirmed)
diff --git a/bindings/const_generator.py b/bindings/const_generator.py
index 982b48f4cb..64e39356cc 100644
--- a/bindings/const_generator.py
+++ b/bindings/const_generator.py
@@ -6,7 +6,7 @@
 
 INCL_DIR = os.path.join('..', 'include', 'unicorn')
 
-include = [ 'arm.h', 'arm64.h', 'mips.h', 'x86.h', 'sparc.h', 'm68k.h', 'ppc.h', 'riscv.h', 's390x.h', 'tricore.h', 'unicorn.h' ]
+include = [ 'arm.h', 'arm64.h', 'avr.h', 'mips.h', 'x86.h', 'sparc.h', 'm68k.h', 'ppc.h', 'rh850.h', 'riscv.h', 's390x.h', 'tricore.h', 'unicorn.h' ]
 
 template = {
     'python': {
@@ -17,11 +17,13 @@
             # prefixes for constant filenames of all archs - case sensitive
             'arm.h': 'arm',
             'arm64.h': 'arm64',
+            'avr.h': 'avr',
             'mips.h': 'mips',
             'x86.h': 'x86',
             'sparc.h': 'sparc',
             'm68k.h': 'm68k',
             'ppc.h': 'ppc',
+            'rh850.h': 'rh850',
             'riscv.h': 'riscv',
             's390x.h' : 's390x',
             'tricore.h' : 'tricore',
@@ -37,12 +39,14 @@
             # prefixes for constant filenames of all archs - case sensitive
             'arm.h': 'arm',
             'arm64.h': 'arm64',
+            'avr.h': 'avr',
             'mips.h': 'mips',
             'x86.h': 'x86',
             'sparc.h': 'sparc',
             'm68k.h': 'm68k',
             'ppc.h': 'ppc',
             'riscv.h': 'riscv',
+            'rh850.h': 'rh850',
             's390x.h' : 's390x',
             'tricore.h' : 'tricore',
             'unicorn.h': 'unicorn',
@@ -57,11 +61,13 @@
             # prefixes for constant filenames of all archs - case sensitive
             'arm.h': 'arm',
             'arm64.h': 'arm64',
+            'avr.h': 'avr',
             'mips.h': 'mips',
             'x86.h': 'x86',
             'sparc.h': 'sparc',
             'm68k.h': 'm68k',
             'ppc.h': 'ppc',
+            'rh850.h': 'rh850',
             'riscv.h': 'riscv',
             's390x.h' : 's390x',
             'tricore.h' : 'tricore',
@@ -77,11 +83,13 @@
             # prefixes for constant filenames of all archs - case sensitive
             'arm.h': 'Arm',
             'arm64.h': 'Arm64',
+            'avr.h': 'AVR',
             'mips.h': 'Mips',
             'x86.h': 'X86',
             'sparc.h': 'Sparc',
             'm68k.h': 'M68k',
             'ppc.h': 'Ppc',
+            'rh850.h': 'Rh850',
             'riscv.h': 'Riscv',
             's390x.h' : 'S390x',
             'tricore.h' : 'TriCore',
@@ -97,11 +105,13 @@
             # prefixes for constant filenames of all archs - case sensitive
             'arm.h': 'Arm',
             'arm64.h': 'Arm64',
+            'avr.h': 'AVR',
             'mips.h': 'Mips',
             'x86.h': 'X86',
             'sparc.h': 'Sparc',
             'm68k.h': 'M68k',
             'ppc.h': 'Ppc',
+            'rh850.h': 'Rh850',
             'riscv.h': 'Riscv',
             's390x.h' : 'S390x',
             'tricore.h' : 'TriCore',
@@ -117,11 +127,13 @@
             # prefixes for constant filenames of all archs - case sensitive
             'arm.h': 'Arm',
             'arm64.h': 'Arm64',
+            'avr.h': 'AVR',
             'mips.h': 'Mips',
             'x86.h': 'X86',
             'sparc.h': 'Sparc',
             'm68k.h': 'M68k',
             'ppc.h': 'Ppc',
+            'rh850.h': 'Rh850',
             'riscv.h': 'Riscv',
             's390x.h' : 'S390x',
             'tricore.h' : 'TriCore',
@@ -137,12 +149,14 @@
             # prefixes for constant filenames of all archs - case sensitive
             'arm.h': 'arm',
             'arm64.h': 'arm64',
+            'avr.h': 'AVR',
             'mips.h': 'mips',
             'x86.h': 'x86',
             'sparc.h': 'sparc',
             'm68k.h': 'm68k',
             'ppc.h': 'ppc',
             'riscv.h': 'riscv',
+            'rh850.h': 'rh850',
             's390x.h' : 's390x',
             'tricore.h' : 'tricore',
             'unicorn.h': 'unicorn',
diff --git a/bindings/dotnet/UnicornEngine/Const/AVR.fs b/bindings/dotnet/UnicornEngine/Const/AVR.fs
new file mode 100644
index 0000000000..d7613dac85
--- /dev/null
+++ b/bindings/dotnet/UnicornEngine/Const/AVR.fs
@@ -0,0 +1,155 @@
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT
+
+namespace UnicornEngine.Const
+
+open System
+
+[<AutoOpen>]
+module AVR =
+
+    // AVR architectures
+    let UC_AVR_ARCH_AVR1 = 10
+    let UC_AVR_ARCH_AVR2 = 20
+    let UC_AVR_ARCH_AVR25 = 25
+    let UC_AVR_ARCH_AVR3 = 30
+    let UC_AVR_ARCH_AVR4 = 40
+    let UC_AVR_ARCH_AVR5 = 50
+    let UC_AVR_ARCH_AVR51 = 51
+    let UC_AVR_ARCH_AVR6 = 60
+    let UC_CPU_AVR_ARCH = 1000
+
+    // AVR CPU
+    let UC_CPU_AVR_ATMEGA16 = 50016
+    let UC_CPU_AVR_ATMEGA32 = 50032
+    let UC_CPU_AVR_ATMEGA64 = 50064
+    let UC_CPU_AVR_ATMEGA128 = 51128
+    let UC_CPU_AVR_ATMEGA128RFR2 = 51129
+    let UC_CPU_AVR_ATMEGA1280 = 51130
+    let UC_CPU_AVR_ATMEGA256 = 60256
+    let UC_CPU_AVR_ATMEGA256RFR2 = 60257
+    let UC_CPU_AVR_ATMEGA2560 = 60258
+
+    // AVR memory
+    let UC_AVR_MEM_FLASH = 134217728
+
+    // AVR registers
+
+    let UC_AVR_REG_INVALID = 0
+    let UC_AVR_REG_R0 = 1
+    let UC_AVR_REG_R1 = 2
+    let UC_AVR_REG_R2 = 3
+    let UC_AVR_REG_R3 = 4
+    let UC_AVR_REG_R4 = 5
+    let UC_AVR_REG_R5 = 6
+    let UC_AVR_REG_R6 = 7
+    let UC_AVR_REG_R7 = 8
+    let UC_AVR_REG_R8 = 9
+    let UC_AVR_REG_R9 = 10
+    let UC_AVR_REG_R10 = 11
+    let UC_AVR_REG_R11 = 12
+    let UC_AVR_REG_R12 = 13
+    let UC_AVR_REG_R13 = 14
+    let UC_AVR_REG_R14 = 15
+    let UC_AVR_REG_R15 = 16
+    let UC_AVR_REG_R16 = 17
+    let UC_AVR_REG_R17 = 18
+    let UC_AVR_REG_R18 = 19
+    let UC_AVR_REG_R19 = 20
+    let UC_AVR_REG_R20 = 21
+    let UC_AVR_REG_R21 = 22
+    let UC_AVR_REG_R22 = 23
+    let UC_AVR_REG_R23 = 24
+    let UC_AVR_REG_R24 = 25
+    let UC_AVR_REG_R25 = 26
+    let UC_AVR_REG_R26 = 27
+    let UC_AVR_REG_R27 = 28
+    let UC_AVR_REG_R28 = 29
+    let UC_AVR_REG_R29 = 30
+    let UC_AVR_REG_R30 = 31
+    let UC_AVR_REG_R31 = 32
+    let UC_AVR_REG_PC = 33
+    let UC_AVR_REG_SP = 34
+    let UC_AVR_REG_RAMPD = 57
+    let UC_AVR_REG_RAMPX = 58
+    let UC_AVR_REG_RAMPY = 59
+    let UC_AVR_REG_RAMPZ = 60
+    let UC_AVR_REG_EIND = 61
+    let UC_AVR_REG_SPL = 62
+    let UC_AVR_REG_SPH = 63
+    let UC_AVR_REG_SREG = 64
+
+    // 16-bit coalesced registers
+    let UC_AVR_REG_R0W = 65
+    let UC_AVR_REG_R1W = 66
+    let UC_AVR_REG_R2W = 67
+    let UC_AVR_REG_R3W = 68
+    let UC_AVR_REG_R4W = 69
+    let UC_AVR_REG_R5W = 70
+    let UC_AVR_REG_R6W = 71
+    let UC_AVR_REG_R7W = 72
+    let UC_AVR_REG_R8W = 73
+    let UC_AVR_REG_R9W = 74
+    let UC_AVR_REG_R10W = 75
+    let UC_AVR_REG_R11W = 76
+    let UC_AVR_REG_R12W = 77
+    let UC_AVR_REG_R13W = 78
+    let UC_AVR_REG_R14W = 79
+    let UC_AVR_REG_R15W = 80
+    let UC_AVR_REG_R16W = 81
+    let UC_AVR_REG_R17W = 82
+    let UC_AVR_REG_R18W = 83
+    let UC_AVR_REG_R19W = 84
+    let UC_AVR_REG_R20W = 85
+    let UC_AVR_REG_R21W = 86
+    let UC_AVR_REG_R22W = 87
+    let UC_AVR_REG_R23W = 88
+    let UC_AVR_REG_R24W = 89
+    let UC_AVR_REG_R25W = 90
+    let UC_AVR_REG_R26W = 91
+    let UC_AVR_REG_R27W = 92
+    let UC_AVR_REG_R28W = 93
+    let UC_AVR_REG_R29W = 94
+    let UC_AVR_REG_R30W = 95
+
+    // 32-bit coalesced registers
+    let UC_AVR_REG_R0D = 97
+    let UC_AVR_REG_R1D = 98
+    let UC_AVR_REG_R2D = 99
+    let UC_AVR_REG_R3D = 100
+    let UC_AVR_REG_R4D = 101
+    let UC_AVR_REG_R5D = 102
+    let UC_AVR_REG_R6D = 103
+    let UC_AVR_REG_R7D = 104
+    let UC_AVR_REG_R8D = 105
+    let UC_AVR_REG_R9D = 106
+    let UC_AVR_REG_R10D = 107
+    let UC_AVR_REG_R11D = 108
+    let UC_AVR_REG_R12D = 109
+    let UC_AVR_REG_R13D = 110
+    let UC_AVR_REG_R14D = 111
+    let UC_AVR_REG_R15D = 112
+    let UC_AVR_REG_R16D = 113
+    let UC_AVR_REG_R17D = 114
+    let UC_AVR_REG_R18D = 115
+    let UC_AVR_REG_R19D = 116
+    let UC_AVR_REG_R20D = 117
+    let UC_AVR_REG_R21D = 118
+    let UC_AVR_REG_R22D = 119
+    let UC_AVR_REG_R23D = 120
+    let UC_AVR_REG_R24D = 121
+    let UC_AVR_REG_R25D = 122
+    let UC_AVR_REG_R26D = 123
+    let UC_AVR_REG_R27D = 124
+    let UC_AVR_REG_R28D = 125
+
+    // Alias registers
+    let UC_AVR_REG_Xhi = 28
+    let UC_AVR_REG_Xlo = 27
+    let UC_AVR_REG_Yhi = 30
+    let UC_AVR_REG_Ylo = 29
+    let UC_AVR_REG_Zhi = 32
+    let UC_AVR_REG_Zlo = 31
+    let UC_AVR_REG_X = 91
+    let UC_AVR_REG_Y = 93
+    let UC_AVR_REG_Z = 95
+
diff --git a/bindings/dotnet/UnicornEngine/Const/Common.fs b/bindings/dotnet/UnicornEngine/Const/Common.fs
index addce7b7c6..cdd6ce767a 100644
--- a/bindings/dotnet/UnicornEngine/Const/Common.fs
+++ b/bindings/dotnet/UnicornEngine/Const/Common.fs
@@ -26,7 +26,9 @@ module Common =
     let UC_ARCH_RISCV = 8
     let UC_ARCH_S390X = 9
     let UC_ARCH_TRICORE = 10
-    let UC_ARCH_MAX = 11
+    let UC_ARCH_AVR = 11
+    let UC_ARCH_RH850 = 12
+    let UC_ARCH_MAX = 13
 
     let UC_MODE_LITTLE_ENDIAN = 0
     let UC_MODE_BIG_ENDIAN = 1073741824
@@ -53,6 +55,7 @@ module Common =
     let UC_MODE_SPARC32 = 4
     let UC_MODE_SPARC64 = 8
     let UC_MODE_V9 = 16
+    let UC_MODE_RH850 = 4
     let UC_MODE_RISCV32 = 4
     let UC_MODE_RISCV64 = 8
 
diff --git a/bindings/dotnet/UnicornEngine/Const/Rh850.fs b/bindings/dotnet/UnicornEngine/Const/Rh850.fs
new file mode 100644
index 0000000000..073f10416f
--- /dev/null
+++ b/bindings/dotnet/UnicornEngine/Const/Rh850.fs
@@ -0,0 +1,99 @@
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT
+
+namespace UnicornManaged.Const
+
+open System
+
+[<AutoOpen>]
+module Rh850 =
+    let UC_RH850_SYSREG_SELID0 = 32
+    let UC_RH850_SYSREG_SELID1 = 64
+    let UC_RH850_SYSREG_SELID2 = 96
+    let UC_RH850_SYSREG_SELID3 = 128
+    let UC_RH850_SYSREG_SELID4 = 160
+    let UC_RH850_SYSREG_SELID5 = 192
+    let UC_RH850_SYSREG_SELID6 = 224
+    let UC_RH850_SYSREG_SELID7 = 256
+
+    // RH850 global purpose registers
+
+    let UC_RH850_REG_R0 = 0
+    let UC_RH850_REG_R1 = 1
+    let UC_RH850_REG_R2 = 2
+    let UC_RH850_REG_R3 = 3
+    let UC_RH850_REG_R4 = 4
+    let UC_RH850_REG_R5 = 5
+    let UC_RH850_REG_R6 = 6
+    let UC_RH850_REG_R7 = 7
+    let UC_RH850_REG_R8 = 8
+    let UC_RH850_REG_R9 = 9
+    let UC_RH850_REG_R10 = 10
+    let UC_RH850_REG_R11 = 11
+    let UC_RH850_REG_R12 = 12
+    let UC_RH850_REG_R13 = 13
+    let UC_RH850_REG_R14 = 14
+    let UC_RH850_REG_R15 = 15
+    let UC_RH850_REG_R16 = 16
+    let UC_RH850_REG_R17 = 17
+    let UC_RH850_REG_R18 = 18
+    let UC_RH850_REG_R19 = 19
+    let UC_RH850_REG_R20 = 20
+    let UC_RH850_REG_R21 = 21
+    let UC_RH850_REG_R22 = 22
+    let UC_RH850_REG_R23 = 23
+    let UC_RH850_REG_R24 = 24
+    let UC_RH850_REG_R25 = 25
+    let UC_RH850_REG_R26 = 26
+    let UC_RH850_REG_R27 = 27
+    let UC_RH850_REG_R28 = 28
+    let UC_RH850_REG_R29 = 29
+    let UC_RH850_REG_R30 = 30
+    let UC_RH850_REG_R31 = 31
+
+    // RH850 system registers, selection ID 0
+    let UC_RH850_REG_EIPC = 32
+    let UC_RH850_REG_EIPSW = 33
+    let UC_RH850_REG_FEPC = 34
+    let UC_RH850_REG_FEPSW = 35
+    let UC_RH850_REG_ECR = 36
+    let UC_RH850_REG_PSW = 37
+    let UC_RH850_REG_FPSR = 38
+    let UC_RH850_REG_FPEPC = 39
+    let UC_RH850_REG_FPST = 40
+    let UC_RH850_REG_FPCC = 41
+    let UC_RH850_REG_FPCFG = 42
+    let UC_RH850_REG_FPEC = 43
+    let UC_RH850_REG_EIIC = 45
+    let UC_RH850_REG_FEIC = 46
+    let UC_RH850_REG_CTPC = 48
+    let UC_RH850_REG_CTPSW = 49
+    let UC_RH850_REG_CTBP = 52
+    let UC_RH850_REG_EIWR = 60
+    let UC_RH850_REG_FEWR = 61
+    let UC_RH850_REG_BSEL = 63
+
+    // RH850 system regusters, selection ID 1
+    let UC_RH850_REG_MCFG0 = 64
+    let UC_RH850_REG_RBASE = 65
+    let UC_RH850_REG_EBASE = 66
+    let UC_RH850_REG_INTBP = 67
+    let UC_RH850_REG_MCTL = 68
+    let UC_RH850_REG_PID = 69
+    let UC_RH850_REG_SCCFG = 75
+    let UC_RH850_REG_SCBP = 76
+
+    // RH850 system registers, selection ID 2
+    let UC_RH850_REG_HTCFG0 = 96
+    let UC_RH850_REG_MEA = 102
+    let UC_RH850_REG_ASID = 103
+    let UC_RH850_REG_MEI = 104
+    let UC_RH850_REG_PC = 288
+    let UC_RH850_REG_ENDING = 289
+
+    // RH8509 Registers aliases.
+
+    let UC_RH850_REG_ZERO = 0
+    let UC_RH850_REG_SP = 3
+    let UC_RH850_REG_EP = 30
+    let UC_RH850_REG_LP = 31
+
diff --git a/bindings/go/unicorn/avr_const.go b/bindings/go/unicorn/avr_const.go
new file mode 100644
index 0000000000..985bf7d009
--- /dev/null
+++ b/bindings/go/unicorn/avr_const.go
@@ -0,0 +1,150 @@
+package unicorn
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [avr_const.go]
+const (
+
+// AVR architectures
+	AVR_ARCH_AVR1 = 10
+	AVR_ARCH_AVR2 = 20
+	AVR_ARCH_AVR25 = 25
+	AVR_ARCH_AVR3 = 30
+	AVR_ARCH_AVR4 = 40
+	AVR_ARCH_AVR5 = 50
+	AVR_ARCH_AVR51 = 51
+	AVR_ARCH_AVR6 = 60
+	CPU_AVR_ARCH = 1000
+
+// AVR CPU
+	CPU_AVR_ATMEGA16 = 50016
+	CPU_AVR_ATMEGA32 = 50032
+	CPU_AVR_ATMEGA64 = 50064
+	CPU_AVR_ATMEGA128 = 51128
+	CPU_AVR_ATMEGA128RFR2 = 51129
+	CPU_AVR_ATMEGA1280 = 51130
+	CPU_AVR_ATMEGA256 = 60256
+	CPU_AVR_ATMEGA256RFR2 = 60257
+	CPU_AVR_ATMEGA2560 = 60258
+
+// AVR memory
+	AVR_MEM_FLASH = 134217728
+
+// AVR registers
+
+	AVR_REG_INVALID = 0
+	AVR_REG_R0 = 1
+	AVR_REG_R1 = 2
+	AVR_REG_R2 = 3
+	AVR_REG_R3 = 4
+	AVR_REG_R4 = 5
+	AVR_REG_R5 = 6
+	AVR_REG_R6 = 7
+	AVR_REG_R7 = 8
+	AVR_REG_R8 = 9
+	AVR_REG_R9 = 10
+	AVR_REG_R10 = 11
+	AVR_REG_R11 = 12
+	AVR_REG_R12 = 13
+	AVR_REG_R13 = 14
+	AVR_REG_R14 = 15
+	AVR_REG_R15 = 16
+	AVR_REG_R16 = 17
+	AVR_REG_R17 = 18
+	AVR_REG_R18 = 19
+	AVR_REG_R19 = 20
+	AVR_REG_R20 = 21
+	AVR_REG_R21 = 22
+	AVR_REG_R22 = 23
+	AVR_REG_R23 = 24
+	AVR_REG_R24 = 25
+	AVR_REG_R25 = 26
+	AVR_REG_R26 = 27
+	AVR_REG_R27 = 28
+	AVR_REG_R28 = 29
+	AVR_REG_R29 = 30
+	AVR_REG_R30 = 31
+	AVR_REG_R31 = 32
+	AVR_REG_PC = 33
+	AVR_REG_SP = 34
+	AVR_REG_RAMPD = 57
+	AVR_REG_RAMPX = 58
+	AVR_REG_RAMPY = 59
+	AVR_REG_RAMPZ = 60
+	AVR_REG_EIND = 61
+	AVR_REG_SPL = 62
+	AVR_REG_SPH = 63
+	AVR_REG_SREG = 64
+
+// 16-bit coalesced registers
+	AVR_REG_R0W = 65
+	AVR_REG_R1W = 66
+	AVR_REG_R2W = 67
+	AVR_REG_R3W = 68
+	AVR_REG_R4W = 69
+	AVR_REG_R5W = 70
+	AVR_REG_R6W = 71
+	AVR_REG_R7W = 72
+	AVR_REG_R8W = 73
+	AVR_REG_R9W = 74
+	AVR_REG_R10W = 75
+	AVR_REG_R11W = 76
+	AVR_REG_R12W = 77
+	AVR_REG_R13W = 78
+	AVR_REG_R14W = 79
+	AVR_REG_R15W = 80
+	AVR_REG_R16W = 81
+	AVR_REG_R17W = 82
+	AVR_REG_R18W = 83
+	AVR_REG_R19W = 84
+	AVR_REG_R20W = 85
+	AVR_REG_R21W = 86
+	AVR_REG_R22W = 87
+	AVR_REG_R23W = 88
+	AVR_REG_R24W = 89
+	AVR_REG_R25W = 90
+	AVR_REG_R26W = 91
+	AVR_REG_R27W = 92
+	AVR_REG_R28W = 93
+	AVR_REG_R29W = 94
+	AVR_REG_R30W = 95
+
+// 32-bit coalesced registers
+	AVR_REG_R0D = 97
+	AVR_REG_R1D = 98
+	AVR_REG_R2D = 99
+	AVR_REG_R3D = 100
+	AVR_REG_R4D = 101
+	AVR_REG_R5D = 102
+	AVR_REG_R6D = 103
+	AVR_REG_R7D = 104
+	AVR_REG_R8D = 105
+	AVR_REG_R9D = 106
+	AVR_REG_R10D = 107
+	AVR_REG_R11D = 108
+	AVR_REG_R12D = 109
+	AVR_REG_R13D = 110
+	AVR_REG_R14D = 111
+	AVR_REG_R15D = 112
+	AVR_REG_R16D = 113
+	AVR_REG_R17D = 114
+	AVR_REG_R18D = 115
+	AVR_REG_R19D = 116
+	AVR_REG_R20D = 117
+	AVR_REG_R21D = 118
+	AVR_REG_R22D = 119
+	AVR_REG_R23D = 120
+	AVR_REG_R24D = 121
+	AVR_REG_R25D = 122
+	AVR_REG_R26D = 123
+	AVR_REG_R27D = 124
+	AVR_REG_R28D = 125
+
+// Alias registers
+	AVR_REG_Xhi = 28
+	AVR_REG_Xlo = 27
+	AVR_REG_Yhi = 30
+	AVR_REG_Ylo = 29
+	AVR_REG_Zhi = 32
+	AVR_REG_Zlo = 31
+	AVR_REG_X = 91
+	AVR_REG_Y = 93
+	AVR_REG_Z = 95
+)
\ No newline at end of file
diff --git a/bindings/go/unicorn/rh850_const.go b/bindings/go/unicorn/rh850_const.go
new file mode 100644
index 0000000000..72ad301628
--- /dev/null
+++ b/bindings/go/unicorn/rh850_const.go
@@ -0,0 +1,94 @@
+package unicorn
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [rh850_const.go]
+const (
+	RH850_SYSREG_SELID0 = 32
+	RH850_SYSREG_SELID1 = 64
+	RH850_SYSREG_SELID2 = 96
+	RH850_SYSREG_SELID3 = 128
+	RH850_SYSREG_SELID4 = 160
+	RH850_SYSREG_SELID5 = 192
+	RH850_SYSREG_SELID6 = 224
+	RH850_SYSREG_SELID7 = 256
+
+// RH850 global purpose registers
+
+	RH850_REG_R0 = 0
+	RH850_REG_R1 = 1
+	RH850_REG_R2 = 2
+	RH850_REG_R3 = 3
+	RH850_REG_R4 = 4
+	RH850_REG_R5 = 5
+	RH850_REG_R6 = 6
+	RH850_REG_R7 = 7
+	RH850_REG_R8 = 8
+	RH850_REG_R9 = 9
+	RH850_REG_R10 = 10
+	RH850_REG_R11 = 11
+	RH850_REG_R12 = 12
+	RH850_REG_R13 = 13
+	RH850_REG_R14 = 14
+	RH850_REG_R15 = 15
+	RH850_REG_R16 = 16
+	RH850_REG_R17 = 17
+	RH850_REG_R18 = 18
+	RH850_REG_R19 = 19
+	RH850_REG_R20 = 20
+	RH850_REG_R21 = 21
+	RH850_REG_R22 = 22
+	RH850_REG_R23 = 23
+	RH850_REG_R24 = 24
+	RH850_REG_R25 = 25
+	RH850_REG_R26 = 26
+	RH850_REG_R27 = 27
+	RH850_REG_R28 = 28
+	RH850_REG_R29 = 29
+	RH850_REG_R30 = 30
+	RH850_REG_R31 = 31
+
+// RH850 system registers, selection ID 0
+	RH850_REG_EIPC = 32
+	RH850_REG_EIPSW = 33
+	RH850_REG_FEPC = 34
+	RH850_REG_FEPSW = 35
+	RH850_REG_ECR = 36
+	RH850_REG_PSW = 37
+	RH850_REG_FPSR = 38
+	RH850_REG_FPEPC = 39
+	RH850_REG_FPST = 40
+	RH850_REG_FPCC = 41
+	RH850_REG_FPCFG = 42
+	RH850_REG_FPEC = 43
+	RH850_REG_EIIC = 45
+	RH850_REG_FEIC = 46
+	RH850_REG_CTPC = 48
+	RH850_REG_CTPSW = 49
+	RH850_REG_CTBP = 52
+	RH850_REG_EIWR = 60
+	RH850_REG_FEWR = 61
+	RH850_REG_BSEL = 63
+
+// RH850 system regusters, selection ID 1
+	RH850_REG_MCFG0 = 64
+	RH850_REG_RBASE = 65
+	RH850_REG_EBASE = 66
+	RH850_REG_INTBP = 67
+	RH850_REG_MCTL = 68
+	RH850_REG_PID = 69
+	RH850_REG_SCCFG = 75
+	RH850_REG_SCBP = 76
+
+// RH850 system registers, selection ID 2
+	RH850_REG_HTCFG0 = 96
+	RH850_REG_MEA = 102
+	RH850_REG_ASID = 103
+	RH850_REG_MEI = 104
+	RH850_REG_PC = 288
+	RH850_REG_ENDING = 289
+
+// RH8509 Registers aliases.
+
+	RH850_REG_ZERO = 0
+	RH850_REG_SP = 3
+	RH850_REG_EP = 30
+	RH850_REG_LP = 31
+)
\ No newline at end of file
diff --git a/bindings/go/unicorn/unicorn_const.go b/bindings/go/unicorn/unicorn_const.go
index f005f652dd..06e445b7b6 100644
--- a/bindings/go/unicorn/unicorn_const.go
+++ b/bindings/go/unicorn/unicorn_const.go
@@ -21,7 +21,9 @@ const (
 	ARCH_RISCV = 8
 	ARCH_S390X = 9
 	ARCH_TRICORE = 10
-	ARCH_MAX = 11
+	ARCH_AVR = 11
+	ARCH_RH850 = 12
+	ARCH_MAX = 13
 
 	MODE_LITTLE_ENDIAN = 0
 	MODE_BIG_ENDIAN = 1073741824
@@ -48,6 +50,7 @@ const (
 	MODE_SPARC32 = 4
 	MODE_SPARC64 = 8
 	MODE_V9 = 16
+	MODE_RH850 = 4
 	MODE_RISCV32 = 4
 	MODE_RISCV64 = 8
 
@@ -149,4 +152,4 @@ const (
 	PROT_ALL = 7
 	CTL_CONTEXT_CPU = 1
 	CTL_CONTEXT_MEMORY = 2
-)
\ No newline at end of file
+)
diff --git a/bindings/java/src/main/java/unicorn/AVRConst.java b/bindings/java/src/main/java/unicorn/AVRConst.java
new file mode 100644
index 0000000000..066fd97774
--- /dev/null
+++ b/bindings/java/src/main/java/unicorn/AVRConst.java
@@ -0,0 +1,153 @@
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT
+
+package unicorn;
+
+public interface AVRConst {
+
+    // AVR architectures
+    public static final int UC_AVR_ARCH_AVR1 = 10;
+    public static final int UC_AVR_ARCH_AVR2 = 20;
+    public static final int UC_AVR_ARCH_AVR25 = 25;
+    public static final int UC_AVR_ARCH_AVR3 = 30;
+    public static final int UC_AVR_ARCH_AVR4 = 40;
+    public static final int UC_AVR_ARCH_AVR5 = 50;
+    public static final int UC_AVR_ARCH_AVR51 = 51;
+    public static final int UC_AVR_ARCH_AVR6 = 60;
+    public static final int UC_CPU_AVR_ARCH = 1000;
+
+    // AVR CPU
+    public static final int UC_CPU_AVR_ATMEGA16 = 50016;
+    public static final int UC_CPU_AVR_ATMEGA32 = 50032;
+    public static final int UC_CPU_AVR_ATMEGA64 = 50064;
+    public static final int UC_CPU_AVR_ATMEGA128 = 51128;
+    public static final int UC_CPU_AVR_ATMEGA128RFR2 = 51129;
+    public static final int UC_CPU_AVR_ATMEGA1280 = 51130;
+    public static final int UC_CPU_AVR_ATMEGA256 = 60256;
+    public static final int UC_CPU_AVR_ATMEGA256RFR2 = 60257;
+    public static final int UC_CPU_AVR_ATMEGA2560 = 60258;
+
+    // AVR memory
+    public static final int UC_AVR_MEM_FLASH = 134217728;
+
+    // AVR registers
+
+    public static final int UC_AVR_REG_INVALID = 0;
+    public static final int UC_AVR_REG_R0 = 1;
+    public static final int UC_AVR_REG_R1 = 2;
+    public static final int UC_AVR_REG_R2 = 3;
+    public static final int UC_AVR_REG_R3 = 4;
+    public static final int UC_AVR_REG_R4 = 5;
+    public static final int UC_AVR_REG_R5 = 6;
+    public static final int UC_AVR_REG_R6 = 7;
+    public static final int UC_AVR_REG_R7 = 8;
+    public static final int UC_AVR_REG_R8 = 9;
+    public static final int UC_AVR_REG_R9 = 10;
+    public static final int UC_AVR_REG_R10 = 11;
+    public static final int UC_AVR_REG_R11 = 12;
+    public static final int UC_AVR_REG_R12 = 13;
+    public static final int UC_AVR_REG_R13 = 14;
+    public static final int UC_AVR_REG_R14 = 15;
+    public static final int UC_AVR_REG_R15 = 16;
+    public static final int UC_AVR_REG_R16 = 17;
+    public static final int UC_AVR_REG_R17 = 18;
+    public static final int UC_AVR_REG_R18 = 19;
+    public static final int UC_AVR_REG_R19 = 20;
+    public static final int UC_AVR_REG_R20 = 21;
+    public static final int UC_AVR_REG_R21 = 22;
+    public static final int UC_AVR_REG_R22 = 23;
+    public static final int UC_AVR_REG_R23 = 24;
+    public static final int UC_AVR_REG_R24 = 25;
+    public static final int UC_AVR_REG_R25 = 26;
+    public static final int UC_AVR_REG_R26 = 27;
+    public static final int UC_AVR_REG_R27 = 28;
+    public static final int UC_AVR_REG_R28 = 29;
+    public static final int UC_AVR_REG_R29 = 30;
+    public static final int UC_AVR_REG_R30 = 31;
+    public static final int UC_AVR_REG_R31 = 32;
+    public static final int UC_AVR_REG_PC = 33;
+    public static final int UC_AVR_REG_SP = 34;
+    public static final int UC_AVR_REG_RAMPD = 57;
+    public static final int UC_AVR_REG_RAMPX = 58;
+    public static final int UC_AVR_REG_RAMPY = 59;
+    public static final int UC_AVR_REG_RAMPZ = 60;
+    public static final int UC_AVR_REG_EIND = 61;
+    public static final int UC_AVR_REG_SPL = 62;
+    public static final int UC_AVR_REG_SPH = 63;
+    public static final int UC_AVR_REG_SREG = 64;
+
+    // 16-bit coalesced registers
+    public static final int UC_AVR_REG_R0W = 65;
+    public static final int UC_AVR_REG_R1W = 66;
+    public static final int UC_AVR_REG_R2W = 67;
+    public static final int UC_AVR_REG_R3W = 68;
+    public static final int UC_AVR_REG_R4W = 69;
+    public static final int UC_AVR_REG_R5W = 70;
+    public static final int UC_AVR_REG_R6W = 71;
+    public static final int UC_AVR_REG_R7W = 72;
+    public static final int UC_AVR_REG_R8W = 73;
+    public static final int UC_AVR_REG_R9W = 74;
+    public static final int UC_AVR_REG_R10W = 75;
+    public static final int UC_AVR_REG_R11W = 76;
+    public static final int UC_AVR_REG_R12W = 77;
+    public static final int UC_AVR_REG_R13W = 78;
+    public static final int UC_AVR_REG_R14W = 79;
+    public static final int UC_AVR_REG_R15W = 80;
+    public static final int UC_AVR_REG_R16W = 81;
+    public static final int UC_AVR_REG_R17W = 82;
+    public static final int UC_AVR_REG_R18W = 83;
+    public static final int UC_AVR_REG_R19W = 84;
+    public static final int UC_AVR_REG_R20W = 85;
+    public static final int UC_AVR_REG_R21W = 86;
+    public static final int UC_AVR_REG_R22W = 87;
+    public static final int UC_AVR_REG_R23W = 88;
+    public static final int UC_AVR_REG_R24W = 89;
+    public static final int UC_AVR_REG_R25W = 90;
+    public static final int UC_AVR_REG_R26W = 91;
+    public static final int UC_AVR_REG_R27W = 92;
+    public static final int UC_AVR_REG_R28W = 93;
+    public static final int UC_AVR_REG_R29W = 94;
+    public static final int UC_AVR_REG_R30W = 95;
+
+    // 32-bit coalesced registers
+    public static final int UC_AVR_REG_R0D = 97;
+    public static final int UC_AVR_REG_R1D = 98;
+    public static final int UC_AVR_REG_R2D = 99;
+    public static final int UC_AVR_REG_R3D = 100;
+    public static final int UC_AVR_REG_R4D = 101;
+    public static final int UC_AVR_REG_R5D = 102;
+    public static final int UC_AVR_REG_R6D = 103;
+    public static final int UC_AVR_REG_R7D = 104;
+    public static final int UC_AVR_REG_R8D = 105;
+    public static final int UC_AVR_REG_R9D = 106;
+    public static final int UC_AVR_REG_R10D = 107;
+    public static final int UC_AVR_REG_R11D = 108;
+    public static final int UC_AVR_REG_R12D = 109;
+    public static final int UC_AVR_REG_R13D = 110;
+    public static final int UC_AVR_REG_R14D = 111;
+    public static final int UC_AVR_REG_R15D = 112;
+    public static final int UC_AVR_REG_R16D = 113;
+    public static final int UC_AVR_REG_R17D = 114;
+    public static final int UC_AVR_REG_R18D = 115;
+    public static final int UC_AVR_REG_R19D = 116;
+    public static final int UC_AVR_REG_R20D = 117;
+    public static final int UC_AVR_REG_R21D = 118;
+    public static final int UC_AVR_REG_R22D = 119;
+    public static final int UC_AVR_REG_R23D = 120;
+    public static final int UC_AVR_REG_R24D = 121;
+    public static final int UC_AVR_REG_R25D = 122;
+    public static final int UC_AVR_REG_R26D = 123;
+    public static final int UC_AVR_REG_R27D = 124;
+    public static final int UC_AVR_REG_R28D = 125;
+
+    // Alias registers
+    public static final int UC_AVR_REG_Xhi = 28;
+    public static final int UC_AVR_REG_Xlo = 27;
+    public static final int UC_AVR_REG_Yhi = 30;
+    public static final int UC_AVR_REG_Ylo = 29;
+    public static final int UC_AVR_REG_Zhi = 32;
+    public static final int UC_AVR_REG_Zlo = 31;
+    public static final int UC_AVR_REG_X = 91;
+    public static final int UC_AVR_REG_Y = 93;
+    public static final int UC_AVR_REG_Z = 95;
+
+}
diff --git a/bindings/java/src/main/java/unicorn/Rh850Const.java b/bindings/java/src/main/java/unicorn/Rh850Const.java
new file mode 100644
index 0000000000..098eea41b5
--- /dev/null
+++ b/bindings/java/src/main/java/unicorn/Rh850Const.java
@@ -0,0 +1,97 @@
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT
+
+package unicorn;
+
+public interface Rh850Const {
+   public static final int UC_RH850_SYSREG_SELID0 = 32;
+   public static final int UC_RH850_SYSREG_SELID1 = 64;
+   public static final int UC_RH850_SYSREG_SELID2 = 96;
+   public static final int UC_RH850_SYSREG_SELID3 = 128;
+   public static final int UC_RH850_SYSREG_SELID4 = 160;
+   public static final int UC_RH850_SYSREG_SELID5 = 192;
+   public static final int UC_RH850_SYSREG_SELID6 = 224;
+   public static final int UC_RH850_SYSREG_SELID7 = 256;
+
+// RH850 global purpose registers
+
+   public static final int UC_RH850_REG_R0 = 0;
+   public static final int UC_RH850_REG_R1 = 1;
+   public static final int UC_RH850_REG_R2 = 2;
+   public static final int UC_RH850_REG_R3 = 3;
+   public static final int UC_RH850_REG_R4 = 4;
+   public static final int UC_RH850_REG_R5 = 5;
+   public static final int UC_RH850_REG_R6 = 6;
+   public static final int UC_RH850_REG_R7 = 7;
+   public static final int UC_RH850_REG_R8 = 8;
+   public static final int UC_RH850_REG_R9 = 9;
+   public static final int UC_RH850_REG_R10 = 10;
+   public static final int UC_RH850_REG_R11 = 11;
+   public static final int UC_RH850_REG_R12 = 12;
+   public static final int UC_RH850_REG_R13 = 13;
+   public static final int UC_RH850_REG_R14 = 14;
+   public static final int UC_RH850_REG_R15 = 15;
+   public static final int UC_RH850_REG_R16 = 16;
+   public static final int UC_RH850_REG_R17 = 17;
+   public static final int UC_RH850_REG_R18 = 18;
+   public static final int UC_RH850_REG_R19 = 19;
+   public static final int UC_RH850_REG_R20 = 20;
+   public static final int UC_RH850_REG_R21 = 21;
+   public static final int UC_RH850_REG_R22 = 22;
+   public static final int UC_RH850_REG_R23 = 23;
+   public static final int UC_RH850_REG_R24 = 24;
+   public static final int UC_RH850_REG_R25 = 25;
+   public static final int UC_RH850_REG_R26 = 26;
+   public static final int UC_RH850_REG_R27 = 27;
+   public static final int UC_RH850_REG_R28 = 28;
+   public static final int UC_RH850_REG_R29 = 29;
+   public static final int UC_RH850_REG_R30 = 30;
+   public static final int UC_RH850_REG_R31 = 31;
+
+// RH850 system registers, selection ID 0
+   public static final int UC_RH850_REG_EIPC = 32;
+   public static final int UC_RH850_REG_EIPSW = 33;
+   public static final int UC_RH850_REG_FEPC = 34;
+   public static final int UC_RH850_REG_FEPSW = 35;
+   public static final int UC_RH850_REG_ECR = 36;
+   public static final int UC_RH850_REG_PSW = 37;
+   public static final int UC_RH850_REG_FPSR = 38;
+   public static final int UC_RH850_REG_FPEPC = 39;
+   public static final int UC_RH850_REG_FPST = 40;
+   public static final int UC_RH850_REG_FPCC = 41;
+   public static final int UC_RH850_REG_FPCFG = 42;
+   public static final int UC_RH850_REG_FPEC = 43;
+   public static final int UC_RH850_REG_EIIC = 45;
+   public static final int UC_RH850_REG_FEIC = 46;
+   public static final int UC_RH850_REG_CTPC = 48;
+   public static final int UC_RH850_REG_CTPSW = 49;
+   public static final int UC_RH850_REG_CTBP = 52;
+   public static final int UC_RH850_REG_EIWR = 60;
+   public static final int UC_RH850_REG_FEWR = 61;
+   public static final int UC_RH850_REG_BSEL = 63;
+
+// RH850 system regusters, selection ID 1
+   public static final int UC_RH850_REG_MCFG0 = 64;
+   public static final int UC_RH850_REG_RBASE = 65;
+   public static final int UC_RH850_REG_EBASE = 66;
+   public static final int UC_RH850_REG_INTBP = 67;
+   public static final int UC_RH850_REG_MCTL = 68;
+   public static final int UC_RH850_REG_PID = 69;
+   public static final int UC_RH850_REG_SCCFG = 75;
+   public static final int UC_RH850_REG_SCBP = 76;
+
+// RH850 system registers, selection ID 2
+   public static final int UC_RH850_REG_HTCFG0 = 96;
+   public static final int UC_RH850_REG_MEA = 102;
+   public static final int UC_RH850_REG_ASID = 103;
+   public static final int UC_RH850_REG_MEI = 104;
+   public static final int UC_RH850_REG_PC = 288;
+   public static final int UC_RH850_REG_ENDING = 289;
+
+// RH8509 Registers aliases.
+
+   public static final int UC_RH850_REG_ZERO = 0;
+   public static final int UC_RH850_REG_SP = 3;
+   public static final int UC_RH850_REG_EP = 30;
+   public static final int UC_RH850_REG_LP = 31;
+
+}
diff --git a/bindings/java/src/main/java/unicorn/UnicornConst.java b/bindings/java/src/main/java/unicorn/UnicornConst.java
index 2fca78c336..fc06f9b5c2 100644
--- a/bindings/java/src/main/java/unicorn/UnicornConst.java
+++ b/bindings/java/src/main/java/unicorn/UnicornConst.java
@@ -23,7 +23,9 @@ public interface UnicornConst {
     public static final int UC_ARCH_RISCV = 8;
     public static final int UC_ARCH_S390X = 9;
     public static final int UC_ARCH_TRICORE = 10;
-    public static final int UC_ARCH_MAX = 11;
+    public static final int UC_ARCH_AVR = 11;
+    public static final int UC_ARCH_RH850 = 12;
+    public static final int UC_ARCH_MAX = 13;
 
     public static final int UC_MODE_LITTLE_ENDIAN = 0;
     public static final int UC_MODE_BIG_ENDIAN = 1073741824;
@@ -50,6 +52,7 @@ public interface UnicornConst {
     public static final int UC_MODE_SPARC32 = 4;
     public static final int UC_MODE_SPARC64 = 8;
     public static final int UC_MODE_V9 = 16;
+    public static final int UC_MODE_RH850 = 4;
     public static final int UC_MODE_RISCV32 = 4;
     public static final int UC_MODE_RISCV64 = 8;
 
diff --git a/bindings/pascal/unicorn/AVRConst.pas b/bindings/pascal/unicorn/AVRConst.pas
new file mode 100644
index 0000000000..c607d54ac9
--- /dev/null
+++ b/bindings/pascal/unicorn/AVRConst.pas
@@ -0,0 +1,155 @@
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT
+
+unit AVRConst;
+
+interface
+
+const
+// AVR architectures
+  UC_AVR_ARCH_AVR1 = 10;
+  UC_AVR_ARCH_AVR2 = 20;
+  UC_AVR_ARCH_AVR25 = 25;
+  UC_AVR_ARCH_AVR3 = 30;
+  UC_AVR_ARCH_AVR4 = 40;
+  UC_AVR_ARCH_AVR5 = 50;
+  UC_AVR_ARCH_AVR51 = 51;
+  UC_AVR_ARCH_AVR6 = 60;
+  UC_CPU_AVR_ARCH = 1000;
+
+// AVR CPU
+  UC_CPU_AVR_ATMEGA16 = 50016;
+  UC_CPU_AVR_ATMEGA32 = 50032;
+  UC_CPU_AVR_ATMEGA64 = 50064;
+  UC_CPU_AVR_ATMEGA128 = 51128;
+  UC_CPU_AVR_ATMEGA128RFR2 = 51129;
+  UC_CPU_AVR_ATMEGA1280 = 51130;
+  UC_CPU_AVR_ATMEGA256 = 60256;
+  UC_CPU_AVR_ATMEGA256RFR2 = 60257;
+  UC_CPU_AVR_ATMEGA2560 = 60258;
+
+// AVR memory
+  UC_AVR_MEM_FLASH = 134217728;
+
+// AVR registers
+
+  UC_AVR_REG_INVALID = 0;
+  UC_AVR_REG_R0 = 1;
+  UC_AVR_REG_R1 = 2;
+  UC_AVR_REG_R2 = 3;
+  UC_AVR_REG_R3 = 4;
+  UC_AVR_REG_R4 = 5;
+  UC_AVR_REG_R5 = 6;
+  UC_AVR_REG_R6 = 7;
+  UC_AVR_REG_R7 = 8;
+  UC_AVR_REG_R8 = 9;
+  UC_AVR_REG_R9 = 10;
+  UC_AVR_REG_R10 = 11;
+  UC_AVR_REG_R11 = 12;
+  UC_AVR_REG_R12 = 13;
+  UC_AVR_REG_R13 = 14;
+  UC_AVR_REG_R14 = 15;
+  UC_AVR_REG_R15 = 16;
+  UC_AVR_REG_R16 = 17;
+  UC_AVR_REG_R17 = 18;
+  UC_AVR_REG_R18 = 19;
+  UC_AVR_REG_R19 = 20;
+  UC_AVR_REG_R20 = 21;
+  UC_AVR_REG_R21 = 22;
+  UC_AVR_REG_R22 = 23;
+  UC_AVR_REG_R23 = 24;
+  UC_AVR_REG_R24 = 25;
+  UC_AVR_REG_R25 = 26;
+  UC_AVR_REG_R26 = 27;
+  UC_AVR_REG_R27 = 28;
+  UC_AVR_REG_R28 = 29;
+  UC_AVR_REG_R29 = 30;
+  UC_AVR_REG_R30 = 31;
+  UC_AVR_REG_R31 = 32;
+  UC_AVR_REG_PC = 33;
+  UC_AVR_REG_SP = 34;
+  UC_AVR_REG_RAMPD = 57;
+  UC_AVR_REG_RAMPX = 58;
+  UC_AVR_REG_RAMPY = 59;
+  UC_AVR_REG_RAMPZ = 60;
+  UC_AVR_REG_EIND = 61;
+  UC_AVR_REG_SPL = 62;
+  UC_AVR_REG_SPH = 63;
+  UC_AVR_REG_SREG = 64;
+
+// 16-bit coalesced registers
+  UC_AVR_REG_R0W = 65;
+  UC_AVR_REG_R1W = 66;
+  UC_AVR_REG_R2W = 67;
+  UC_AVR_REG_R3W = 68;
+  UC_AVR_REG_R4W = 69;
+  UC_AVR_REG_R5W = 70;
+  UC_AVR_REG_R6W = 71;
+  UC_AVR_REG_R7W = 72;
+  UC_AVR_REG_R8W = 73;
+  UC_AVR_REG_R9W = 74;
+  UC_AVR_REG_R10W = 75;
+  UC_AVR_REG_R11W = 76;
+  UC_AVR_REG_R12W = 77;
+  UC_AVR_REG_R13W = 78;
+  UC_AVR_REG_R14W = 79;
+  UC_AVR_REG_R15W = 80;
+  UC_AVR_REG_R16W = 81;
+  UC_AVR_REG_R17W = 82;
+  UC_AVR_REG_R18W = 83;
+  UC_AVR_REG_R19W = 84;
+  UC_AVR_REG_R20W = 85;
+  UC_AVR_REG_R21W = 86;
+  UC_AVR_REG_R22W = 87;
+  UC_AVR_REG_R23W = 88;
+  UC_AVR_REG_R24W = 89;
+  UC_AVR_REG_R25W = 90;
+  UC_AVR_REG_R26W = 91;
+  UC_AVR_REG_R27W = 92;
+  UC_AVR_REG_R28W = 93;
+  UC_AVR_REG_R29W = 94;
+  UC_AVR_REG_R30W = 95;
+
+// 32-bit coalesced registers
+  UC_AVR_REG_R0D = 97;
+  UC_AVR_REG_R1D = 98;
+  UC_AVR_REG_R2D = 99;
+  UC_AVR_REG_R3D = 100;
+  UC_AVR_REG_R4D = 101;
+  UC_AVR_REG_R5D = 102;
+  UC_AVR_REG_R6D = 103;
+  UC_AVR_REG_R7D = 104;
+  UC_AVR_REG_R8D = 105;
+  UC_AVR_REG_R9D = 106;
+  UC_AVR_REG_R10D = 107;
+  UC_AVR_REG_R11D = 108;
+  UC_AVR_REG_R12D = 109;
+  UC_AVR_REG_R13D = 110;
+  UC_AVR_REG_R14D = 111;
+  UC_AVR_REG_R15D = 112;
+  UC_AVR_REG_R16D = 113;
+  UC_AVR_REG_R17D = 114;
+  UC_AVR_REG_R18D = 115;
+  UC_AVR_REG_R19D = 116;
+  UC_AVR_REG_R20D = 117;
+  UC_AVR_REG_R21D = 118;
+  UC_AVR_REG_R22D = 119;
+  UC_AVR_REG_R23D = 120;
+  UC_AVR_REG_R24D = 121;
+  UC_AVR_REG_R25D = 122;
+  UC_AVR_REG_R26D = 123;
+  UC_AVR_REG_R27D = 124;
+  UC_AVR_REG_R28D = 125;
+
+// Alias registers
+  UC_AVR_REG_Xhi = 28;
+  UC_AVR_REG_Xlo = 27;
+  UC_AVR_REG_Yhi = 30;
+  UC_AVR_REG_Ylo = 29;
+  UC_AVR_REG_Zhi = 32;
+  UC_AVR_REG_Zlo = 31;
+  UC_AVR_REG_X = 91;
+  UC_AVR_REG_Y = 93;
+  UC_AVR_REG_Z = 95;
+
+implementation
+end.
\ No newline at end of file
diff --git a/bindings/pascal/unicorn/Rh850Const.pas b/bindings/pascal/unicorn/Rh850Const.pas
new file mode 100644
index 0000000000..c405b3c464
--- /dev/null
+++ b/bindings/pascal/unicorn/Rh850Const.pas
@@ -0,0 +1,99 @@
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT
+
+unit Rh850Const;
+
+interface
+
+const  UC_RH850_SYSREG_SELID0 = 32;
+  UC_RH850_SYSREG_SELID1 = 64;
+  UC_RH850_SYSREG_SELID2 = 96;
+  UC_RH850_SYSREG_SELID3 = 128;
+  UC_RH850_SYSREG_SELID4 = 160;
+  UC_RH850_SYSREG_SELID5 = 192;
+  UC_RH850_SYSREG_SELID6 = 224;
+  UC_RH850_SYSREG_SELID7 = 256;
+
+// RH850 global purpose registers
+
+  UC_RH850_REG_R0 = 0;
+  UC_RH850_REG_R1 = 1;
+  UC_RH850_REG_R2 = 2;
+  UC_RH850_REG_R3 = 3;
+  UC_RH850_REG_R4 = 4;
+  UC_RH850_REG_R5 = 5;
+  UC_RH850_REG_R6 = 6;
+  UC_RH850_REG_R7 = 7;
+  UC_RH850_REG_R8 = 8;
+  UC_RH850_REG_R9 = 9;
+  UC_RH850_REG_R10 = 10;
+  UC_RH850_REG_R11 = 11;
+  UC_RH850_REG_R12 = 12;
+  UC_RH850_REG_R13 = 13;
+  UC_RH850_REG_R14 = 14;
+  UC_RH850_REG_R15 = 15;
+  UC_RH850_REG_R16 = 16;
+  UC_RH850_REG_R17 = 17;
+  UC_RH850_REG_R18 = 18;
+  UC_RH850_REG_R19 = 19;
+  UC_RH850_REG_R20 = 20;
+  UC_RH850_REG_R21 = 21;
+  UC_RH850_REG_R22 = 22;
+  UC_RH850_REG_R23 = 23;
+  UC_RH850_REG_R24 = 24;
+  UC_RH850_REG_R25 = 25;
+  UC_RH850_REG_R26 = 26;
+  UC_RH850_REG_R27 = 27;
+  UC_RH850_REG_R28 = 28;
+  UC_RH850_REG_R29 = 29;
+  UC_RH850_REG_R30 = 30;
+  UC_RH850_REG_R31 = 31;
+
+// RH850 system registers, selection ID 0
+  UC_RH850_REG_EIPC = 32;
+  UC_RH850_REG_EIPSW = 33;
+  UC_RH850_REG_FEPC = 34;
+  UC_RH850_REG_FEPSW = 35;
+  UC_RH850_REG_ECR = 36;
+  UC_RH850_REG_PSW = 37;
+  UC_RH850_REG_FPSR = 38;
+  UC_RH850_REG_FPEPC = 39;
+  UC_RH850_REG_FPST = 40;
+  UC_RH850_REG_FPCC = 41;
+  UC_RH850_REG_FPCFG = 42;
+  UC_RH850_REG_FPEC = 43;
+  UC_RH850_REG_EIIC = 45;
+  UC_RH850_REG_FEIC = 46;
+  UC_RH850_REG_CTPC = 48;
+  UC_RH850_REG_CTPSW = 49;
+  UC_RH850_REG_CTBP = 52;
+  UC_RH850_REG_EIWR = 60;
+  UC_RH850_REG_FEWR = 61;
+  UC_RH850_REG_BSEL = 63;
+
+// RH850 system regusters, selection ID 1
+  UC_RH850_REG_MCFG0 = 64;
+  UC_RH850_REG_RBASE = 65;
+  UC_RH850_REG_EBASE = 66;
+  UC_RH850_REG_INTBP = 67;
+  UC_RH850_REG_MCTL = 68;
+  UC_RH850_REG_PID = 69;
+  UC_RH850_REG_SCCFG = 75;
+  UC_RH850_REG_SCBP = 76;
+
+// RH850 system registers, selection ID 2
+  UC_RH850_REG_HTCFG0 = 96;
+  UC_RH850_REG_MEA = 102;
+  UC_RH850_REG_ASID = 103;
+  UC_RH850_REG_MEI = 104;
+  UC_RH850_REG_PC = 288;
+  UC_RH850_REG_ENDING = 289;
+
+// RH8509 Registers aliases.
+
+  UC_RH850_REG_ZERO = 0;
+  UC_RH850_REG_SP = 3;
+  UC_RH850_REG_EP = 30;
+  UC_RH850_REG_LP = 31;
+
+implementation
+end.
\ No newline at end of file
diff --git a/bindings/pascal/unicorn/UnicornConst.pas b/bindings/pascal/unicorn/UnicornConst.pas
index 0716ec3e4e..ce19c20386 100644
--- a/bindings/pascal/unicorn/UnicornConst.pas
+++ b/bindings/pascal/unicorn/UnicornConst.pas
@@ -24,7 +24,9 @@ interface
   UC_ARCH_RISCV = 8;
   UC_ARCH_S390X = 9;
   UC_ARCH_TRICORE = 10;
-  UC_ARCH_MAX = 11;
+  UC_ARCH_AVR = 11;
+  UC_ARCH_RH850 = 12;
+  UC_ARCH_MAX = 13;
 
   UC_MODE_LITTLE_ENDIAN = 0;
   UC_MODE_BIG_ENDIAN = 1073741824;
@@ -51,6 +53,7 @@ interface
   UC_MODE_SPARC32 = 4;
   UC_MODE_SPARC64 = 8;
   UC_MODE_V9 = 16;
+  UC_MODE_RH850 = 4;
   UC_MODE_RISCV32 = 4;
   UC_MODE_RISCV64 = 8;
 
@@ -154,4 +157,4 @@ interface
   UC_CTL_CONTEXT_MEMORY = 2;
 
 implementation
-end.
\ No newline at end of file
+end.
diff --git a/bindings/python/sample_rh850.py b/bindings/python/sample_rh850.py
new file mode 100644
index 0000000000..294d76ff2b
--- /dev/null
+++ b/bindings/python/sample_rh850.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# Sample code for RH850 of Unicorn. Damien Cauquil <dcauquil@quarkslab.com>
+#
+
+from __future__ import print_function
+from unicorn import *
+from unicorn.rh850_const import *
+
+
+'''
+ 0  01 0e 06 addi 6, r1, r1
+ 4  00 c1 11 add  r1, r2
+'''
+RH850_CODE = b"\x01\x0e\x06\x00\xc1\x11"
+
+# memory address where emulation starts
+ADDRESS    = 0x10000
+
+
+# callback for tracing basic blocks
+def hook_block(uc, address, size, user_data):
+    print(">>> Tracing basic block at 0x%x, block size = 0x%x" %(address, size))
+
+
+# callback for tracing instructions
+def hook_code(uc, address, size, user_data):
+    print(">>> Tracing instruction at 0x%x, instruction size = 0x%x" %(address, size))
+
+
+# Test RH850
+def test_rh850():
+    print("Emulate RH850 code")
+    try:
+        # Initialize emulator in RISCV32 mode
+        mu = Uc(UC_ARCH_RH850, 0)
+
+        # map 2MB memory for this emulation
+        mu.mem_map(ADDRESS, 2 * 1024 * 1024)
+
+        # write machine code to be emulated to memory
+        mu.mem_write(ADDRESS, RH850_CODE)
+
+        # initialize machine registers
+        mu.reg_write(UC_RH850_REG_R1, 0x1234)
+        mu.reg_write(UC_RH850_REG_R2, 0x7890)
+
+        # tracing all basic blocks with customized callback
+        mu.hook_add(UC_HOOK_BLOCK, hook_block)
+
+        # tracing all instructions with customized callback
+        mu.hook_add(UC_HOOK_CODE, hook_code)
+
+        # emulate machine code in infinite time
+        mu.emu_start(ADDRESS, ADDRESS + len(RH850_CODE))
+
+        # now print out some registers
+        print(">>> Emulation done. Below is the CPU context")
+
+        r1 = mu.reg_read(UC_RH850_REG_R1)
+        r2 = mu.reg_read(UC_RH850_REG_R2)
+        print(">>> R1 = 0x%x" % r1)
+        print(">>> R2 = 0x%x" % r2)
+
+    except UcError as e:
+        print("ERROR: %s" % e)
+
+
+if __name__ == '__main__':
+    test_rh850()
+
diff --git a/bindings/python/unicorn/__init__.py b/bindings/python/unicorn/__init__.py
index a93e12a05c..04894a8e2b 100644
--- a/bindings/python/unicorn/__init__.py
+++ b/bindings/python/unicorn/__init__.py
@@ -1,4 +1,4 @@
 # Forwarding defs for compatibility
-from . import arm_const, arm64_const, mips_const, sparc_const, m68k_const, x86_const, riscv_const, s390x_const, tricore_const
+from . import arm_const, arm64_const, avr_const, mips_const, sparc_const, m68k_const, x86_const, riscv_const, s390x_const, tricore_const
 from .unicorn_const import *
 from .unicorn import Uc, ucsubclass, uc_version, uc_arch_supported, version_bind, debug, UcError, __version__
diff --git a/bindings/python/unicorn/avr_const.py b/bindings/python/unicorn/avr_const.py
new file mode 100644
index 0000000000..1bf80a3fa5
--- /dev/null
+++ b/bindings/python/unicorn/avr_const.py
@@ -0,0 +1,147 @@
+# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [avr_const.py]
+
+# AVR architectures
+UC_AVR_ARCH_AVR1 = 10
+UC_AVR_ARCH_AVR2 = 20
+UC_AVR_ARCH_AVR25 = 25
+UC_AVR_ARCH_AVR3 = 30
+UC_AVR_ARCH_AVR4 = 40
+UC_AVR_ARCH_AVR5 = 50
+UC_AVR_ARCH_AVR51 = 51
+UC_AVR_ARCH_AVR6 = 60
+UC_CPU_AVR_ARCH = 1000
+
+# AVR CPU
+UC_CPU_AVR_ATMEGA16 = 50016
+UC_CPU_AVR_ATMEGA32 = 50032
+UC_CPU_AVR_ATMEGA64 = 50064
+UC_CPU_AVR_ATMEGA128 = 51128
+UC_CPU_AVR_ATMEGA128RFR2 = 51129
+UC_CPU_AVR_ATMEGA1280 = 51130
+UC_CPU_AVR_ATMEGA256 = 60256
+UC_CPU_AVR_ATMEGA256RFR2 = 60257
+UC_CPU_AVR_ATMEGA2560 = 60258
+
+# AVR memory
+UC_AVR_MEM_FLASH = 134217728
+
+# AVR registers
+
+UC_AVR_REG_INVALID = 0
+UC_AVR_REG_R0 = 1
+UC_AVR_REG_R1 = 2
+UC_AVR_REG_R2 = 3
+UC_AVR_REG_R3 = 4
+UC_AVR_REG_R4 = 5
+UC_AVR_REG_R5 = 6
+UC_AVR_REG_R6 = 7
+UC_AVR_REG_R7 = 8
+UC_AVR_REG_R8 = 9
+UC_AVR_REG_R9 = 10
+UC_AVR_REG_R10 = 11
+UC_AVR_REG_R11 = 12
+UC_AVR_REG_R12 = 13
+UC_AVR_REG_R13 = 14
+UC_AVR_REG_R14 = 15
+UC_AVR_REG_R15 = 16
+UC_AVR_REG_R16 = 17
+UC_AVR_REG_R17 = 18
+UC_AVR_REG_R18 = 19
+UC_AVR_REG_R19 = 20
+UC_AVR_REG_R20 = 21
+UC_AVR_REG_R21 = 22
+UC_AVR_REG_R22 = 23
+UC_AVR_REG_R23 = 24
+UC_AVR_REG_R24 = 25
+UC_AVR_REG_R25 = 26
+UC_AVR_REG_R26 = 27
+UC_AVR_REG_R27 = 28
+UC_AVR_REG_R28 = 29
+UC_AVR_REG_R29 = 30
+UC_AVR_REG_R30 = 31
+UC_AVR_REG_R31 = 32
+UC_AVR_REG_PC = 33
+UC_AVR_REG_SP = 34
+UC_AVR_REG_RAMPD = 57
+UC_AVR_REG_RAMPX = 58
+UC_AVR_REG_RAMPY = 59
+UC_AVR_REG_RAMPZ = 60
+UC_AVR_REG_EIND = 61
+UC_AVR_REG_SPL = 62
+UC_AVR_REG_SPH = 63
+UC_AVR_REG_SREG = 64
+
+# 16-bit coalesced registers
+UC_AVR_REG_R0W = 65
+UC_AVR_REG_R1W = 66
+UC_AVR_REG_R2W = 67
+UC_AVR_REG_R3W = 68
+UC_AVR_REG_R4W = 69
+UC_AVR_REG_R5W = 70
+UC_AVR_REG_R6W = 71
+UC_AVR_REG_R7W = 72
+UC_AVR_REG_R8W = 73
+UC_AVR_REG_R9W = 74
+UC_AVR_REG_R10W = 75
+UC_AVR_REG_R11W = 76
+UC_AVR_REG_R12W = 77
+UC_AVR_REG_R13W = 78
+UC_AVR_REG_R14W = 79
+UC_AVR_REG_R15W = 80
+UC_AVR_REG_R16W = 81
+UC_AVR_REG_R17W = 82
+UC_AVR_REG_R18W = 83
+UC_AVR_REG_R19W = 84
+UC_AVR_REG_R20W = 85
+UC_AVR_REG_R21W = 86
+UC_AVR_REG_R22W = 87
+UC_AVR_REG_R23W = 88
+UC_AVR_REG_R24W = 89
+UC_AVR_REG_R25W = 90
+UC_AVR_REG_R26W = 91
+UC_AVR_REG_R27W = 92
+UC_AVR_REG_R28W = 93
+UC_AVR_REG_R29W = 94
+UC_AVR_REG_R30W = 95
+
+# 32-bit coalesced registers
+UC_AVR_REG_R0D = 97
+UC_AVR_REG_R1D = 98
+UC_AVR_REG_R2D = 99
+UC_AVR_REG_R3D = 100
+UC_AVR_REG_R4D = 101
+UC_AVR_REG_R5D = 102
+UC_AVR_REG_R6D = 103
+UC_AVR_REG_R7D = 104
+UC_AVR_REG_R8D = 105
+UC_AVR_REG_R9D = 106
+UC_AVR_REG_R10D = 107
+UC_AVR_REG_R11D = 108
+UC_AVR_REG_R12D = 109
+UC_AVR_REG_R13D = 110
+UC_AVR_REG_R14D = 111
+UC_AVR_REG_R15D = 112
+UC_AVR_REG_R16D = 113
+UC_AVR_REG_R17D = 114
+UC_AVR_REG_R18D = 115
+UC_AVR_REG_R19D = 116
+UC_AVR_REG_R20D = 117
+UC_AVR_REG_R21D = 118
+UC_AVR_REG_R22D = 119
+UC_AVR_REG_R23D = 120
+UC_AVR_REG_R24D = 121
+UC_AVR_REG_R25D = 122
+UC_AVR_REG_R26D = 123
+UC_AVR_REG_R27D = 124
+UC_AVR_REG_R28D = 125
+
+# Alias registers
+UC_AVR_REG_Xhi = 28
+UC_AVR_REG_Xlo = 27
+UC_AVR_REG_Yhi = 30
+UC_AVR_REG_Ylo = 29
+UC_AVR_REG_Zhi = 32
+UC_AVR_REG_Zlo = 31
+UC_AVR_REG_X = 91
+UC_AVR_REG_Y = 93
+UC_AVR_REG_Z = 95
diff --git a/bindings/python/unicorn/rh850_const.py b/bindings/python/unicorn/rh850_const.py
new file mode 100644
index 0000000000..6985d85e4f
--- /dev/null
+++ b/bindings/python/unicorn/rh850_const.py
@@ -0,0 +1,91 @@
+# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [rh850_const.py]
+UC_RH850_SYSREG_SELID0 = 32
+UC_RH850_SYSREG_SELID1 = 64
+UC_RH850_SYSREG_SELID2 = 96
+UC_RH850_SYSREG_SELID3 = 128
+UC_RH850_SYSREG_SELID4 = 160
+UC_RH850_SYSREG_SELID5 = 192
+UC_RH850_SYSREG_SELID6 = 224
+UC_RH850_SYSREG_SELID7 = 256
+
+# RH850 global purpose registers
+
+UC_RH850_REG_R0 = 0
+UC_RH850_REG_R1 = 1
+UC_RH850_REG_R2 = 2
+UC_RH850_REG_R3 = 3
+UC_RH850_REG_R4 = 4
+UC_RH850_REG_R5 = 5
+UC_RH850_REG_R6 = 6
+UC_RH850_REG_R7 = 7
+UC_RH850_REG_R8 = 8
+UC_RH850_REG_R9 = 9
+UC_RH850_REG_R10 = 10
+UC_RH850_REG_R11 = 11
+UC_RH850_REG_R12 = 12
+UC_RH850_REG_R13 = 13
+UC_RH850_REG_R14 = 14
+UC_RH850_REG_R15 = 15
+UC_RH850_REG_R16 = 16
+UC_RH850_REG_R17 = 17
+UC_RH850_REG_R18 = 18
+UC_RH850_REG_R19 = 19
+UC_RH850_REG_R20 = 20
+UC_RH850_REG_R21 = 21
+UC_RH850_REG_R22 = 22
+UC_RH850_REG_R23 = 23
+UC_RH850_REG_R24 = 24
+UC_RH850_REG_R25 = 25
+UC_RH850_REG_R26 = 26
+UC_RH850_REG_R27 = 27
+UC_RH850_REG_R28 = 28
+UC_RH850_REG_R29 = 29
+UC_RH850_REG_R30 = 30
+UC_RH850_REG_R31 = 31
+
+# RH850 system registers, selection ID 0
+UC_RH850_REG_EIPC = 32
+UC_RH850_REG_EIPSW = 33
+UC_RH850_REG_FEPC = 34
+UC_RH850_REG_FEPSW = 35
+UC_RH850_REG_ECR = 36
+UC_RH850_REG_PSW = 37
+UC_RH850_REG_FPSR = 38
+UC_RH850_REG_FPEPC = 39
+UC_RH850_REG_FPST = 40
+UC_RH850_REG_FPCC = 41
+UC_RH850_REG_FPCFG = 42
+UC_RH850_REG_FPEC = 43
+UC_RH850_REG_EIIC = 45
+UC_RH850_REG_FEIC = 46
+UC_RH850_REG_CTPC = 48
+UC_RH850_REG_CTPSW = 49
+UC_RH850_REG_CTBP = 52
+UC_RH850_REG_EIWR = 60
+UC_RH850_REG_FEWR = 61
+UC_RH850_REG_BSEL = 63
+
+# RH850 system regusters, selection ID 1
+UC_RH850_REG_MCFG0 = 64
+UC_RH850_REG_RBASE = 65
+UC_RH850_REG_EBASE = 66
+UC_RH850_REG_INTBP = 67
+UC_RH850_REG_MCTL = 68
+UC_RH850_REG_PID = 69
+UC_RH850_REG_SCCFG = 75
+UC_RH850_REG_SCBP = 76
+
+# RH850 system registers, selection ID 2
+UC_RH850_REG_HTCFG0 = 96
+UC_RH850_REG_MEA = 102
+UC_RH850_REG_ASID = 103
+UC_RH850_REG_MEI = 104
+UC_RH850_REG_PC = 288
+UC_RH850_REG_ENDING = 289
+
+# RH8509 Registers aliases.
+
+UC_RH850_REG_ZERO = 0
+UC_RH850_REG_SP = 3
+UC_RH850_REG_EP = 30
+UC_RH850_REG_LP = 31
diff --git a/bindings/python/unicorn/unicorn_const.py b/bindings/python/unicorn/unicorn_const.py
index 8fab22415b..d3d72aa3a2 100644
--- a/bindings/python/unicorn/unicorn_const.py
+++ b/bindings/python/unicorn/unicorn_const.py
@@ -19,7 +19,9 @@
 UC_ARCH_RISCV = 8
 UC_ARCH_S390X = 9
 UC_ARCH_TRICORE = 10
-UC_ARCH_MAX = 11
+UC_ARCH_AVR = 11
+UC_ARCH_RH850 = 12
+UC_ARCH_MAX = 13
 
 UC_MODE_LITTLE_ENDIAN = 0
 UC_MODE_BIG_ENDIAN = 1073741824
@@ -46,6 +48,7 @@
 UC_MODE_SPARC32 = 4
 UC_MODE_SPARC64 = 8
 UC_MODE_V9 = 16
+UC_MODE_RH850 = 4
 UC_MODE_RISCV32 = 4
 UC_MODE_RISCV64 = 8
 
diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/avr_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/avr_const.rb
new file mode 100644
index 0000000000..126ebd0c8f
--- /dev/null
+++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/avr_const.rb
@@ -0,0 +1,150 @@
+# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [avr_const.rb]
+
+module UnicornEngine
+
+# AVR architectures
+	UC_AVR_ARCH_AVR1 = 10
+	UC_AVR_ARCH_AVR2 = 20
+	UC_AVR_ARCH_AVR25 = 25
+	UC_AVR_ARCH_AVR3 = 30
+	UC_AVR_ARCH_AVR4 = 40
+	UC_AVR_ARCH_AVR5 = 50
+	UC_AVR_ARCH_AVR51 = 51
+	UC_AVR_ARCH_AVR6 = 60
+	UC_CPU_AVR_ARCH = 1000
+
+# AVR CPU
+	UC_CPU_AVR_ATMEGA16 = 50016
+	UC_CPU_AVR_ATMEGA32 = 50032
+	UC_CPU_AVR_ATMEGA64 = 50064
+	UC_CPU_AVR_ATMEGA128 = 51128
+	UC_CPU_AVR_ATMEGA128RFR2 = 51129
+	UC_CPU_AVR_ATMEGA1280 = 51130
+	UC_CPU_AVR_ATMEGA256 = 60256
+	UC_CPU_AVR_ATMEGA256RFR2 = 60257
+	UC_CPU_AVR_ATMEGA2560 = 60258
+
+# AVR memory
+	UC_AVR_MEM_FLASH = 134217728
+
+# AVR registers
+
+	UC_AVR_REG_INVALID = 0
+	UC_AVR_REG_R0 = 1
+	UC_AVR_REG_R1 = 2
+	UC_AVR_REG_R2 = 3
+	UC_AVR_REG_R3 = 4
+	UC_AVR_REG_R4 = 5
+	UC_AVR_REG_R5 = 6
+	UC_AVR_REG_R6 = 7
+	UC_AVR_REG_R7 = 8
+	UC_AVR_REG_R8 = 9
+	UC_AVR_REG_R9 = 10
+	UC_AVR_REG_R10 = 11
+	UC_AVR_REG_R11 = 12
+	UC_AVR_REG_R12 = 13
+	UC_AVR_REG_R13 = 14
+	UC_AVR_REG_R14 = 15
+	UC_AVR_REG_R15 = 16
+	UC_AVR_REG_R16 = 17
+	UC_AVR_REG_R17 = 18
+	UC_AVR_REG_R18 = 19
+	UC_AVR_REG_R19 = 20
+	UC_AVR_REG_R20 = 21
+	UC_AVR_REG_R21 = 22
+	UC_AVR_REG_R22 = 23
+	UC_AVR_REG_R23 = 24
+	UC_AVR_REG_R24 = 25
+	UC_AVR_REG_R25 = 26
+	UC_AVR_REG_R26 = 27
+	UC_AVR_REG_R27 = 28
+	UC_AVR_REG_R28 = 29
+	UC_AVR_REG_R29 = 30
+	UC_AVR_REG_R30 = 31
+	UC_AVR_REG_R31 = 32
+	UC_AVR_REG_PC = 33
+	UC_AVR_REG_SP = 34
+	UC_AVR_REG_RAMPD = 57
+	UC_AVR_REG_RAMPX = 58
+	UC_AVR_REG_RAMPY = 59
+	UC_AVR_REG_RAMPZ = 60
+	UC_AVR_REG_EIND = 61
+	UC_AVR_REG_SPL = 62
+	UC_AVR_REG_SPH = 63
+	UC_AVR_REG_SREG = 64
+
+# 16-bit coalesced registers
+	UC_AVR_REG_R0W = 65
+	UC_AVR_REG_R1W = 66
+	UC_AVR_REG_R2W = 67
+	UC_AVR_REG_R3W = 68
+	UC_AVR_REG_R4W = 69
+	UC_AVR_REG_R5W = 70
+	UC_AVR_REG_R6W = 71
+	UC_AVR_REG_R7W = 72
+	UC_AVR_REG_R8W = 73
+	UC_AVR_REG_R9W = 74
+	UC_AVR_REG_R10W = 75
+	UC_AVR_REG_R11W = 76
+	UC_AVR_REG_R12W = 77
+	UC_AVR_REG_R13W = 78
+	UC_AVR_REG_R14W = 79
+	UC_AVR_REG_R15W = 80
+	UC_AVR_REG_R16W = 81
+	UC_AVR_REG_R17W = 82
+	UC_AVR_REG_R18W = 83
+	UC_AVR_REG_R19W = 84
+	UC_AVR_REG_R20W = 85
+	UC_AVR_REG_R21W = 86
+	UC_AVR_REG_R22W = 87
+	UC_AVR_REG_R23W = 88
+	UC_AVR_REG_R24W = 89
+	UC_AVR_REG_R25W = 90
+	UC_AVR_REG_R26W = 91
+	UC_AVR_REG_R27W = 92
+	UC_AVR_REG_R28W = 93
+	UC_AVR_REG_R29W = 94
+	UC_AVR_REG_R30W = 95
+
+# 32-bit coalesced registers
+	UC_AVR_REG_R0D = 97
+	UC_AVR_REG_R1D = 98
+	UC_AVR_REG_R2D = 99
+	UC_AVR_REG_R3D = 100
+	UC_AVR_REG_R4D = 101
+	UC_AVR_REG_R5D = 102
+	UC_AVR_REG_R6D = 103
+	UC_AVR_REG_R7D = 104
+	UC_AVR_REG_R8D = 105
+	UC_AVR_REG_R9D = 106
+	UC_AVR_REG_R10D = 107
+	UC_AVR_REG_R11D = 108
+	UC_AVR_REG_R12D = 109
+	UC_AVR_REG_R13D = 110
+	UC_AVR_REG_R14D = 111
+	UC_AVR_REG_R15D = 112
+	UC_AVR_REG_R16D = 113
+	UC_AVR_REG_R17D = 114
+	UC_AVR_REG_R18D = 115
+	UC_AVR_REG_R19D = 116
+	UC_AVR_REG_R20D = 117
+	UC_AVR_REG_R21D = 118
+	UC_AVR_REG_R22D = 119
+	UC_AVR_REG_R23D = 120
+	UC_AVR_REG_R24D = 121
+	UC_AVR_REG_R25D = 122
+	UC_AVR_REG_R26D = 123
+	UC_AVR_REG_R27D = 124
+	UC_AVR_REG_R28D = 125
+
+# Alias registers
+	UC_AVR_REG_Xhi = 28
+	UC_AVR_REG_Xlo = 27
+	UC_AVR_REG_Yhi = 30
+	UC_AVR_REG_Ylo = 29
+	UC_AVR_REG_Zhi = 32
+	UC_AVR_REG_Zlo = 31
+	UC_AVR_REG_X = 91
+	UC_AVR_REG_Y = 93
+	UC_AVR_REG_Z = 95
+end
\ No newline at end of file
diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/rh850_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/rh850_const.rb
new file mode 100644
index 0000000000..40629b9883
--- /dev/null
+++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/rh850_const.rb
@@ -0,0 +1,94 @@
+# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [rh850_const.rb]
+
+module UnicornEngine
+	UC_RH850_SYSREG_SELID0 = 32
+	UC_RH850_SYSREG_SELID1 = 64
+	UC_RH850_SYSREG_SELID2 = 96
+	UC_RH850_SYSREG_SELID3 = 128
+	UC_RH850_SYSREG_SELID4 = 160
+	UC_RH850_SYSREG_SELID5 = 192
+	UC_RH850_SYSREG_SELID6 = 224
+	UC_RH850_SYSREG_SELID7 = 256
+
+# RH850 global purpose registers
+
+	UC_RH850_REG_R0 = 0
+	UC_RH850_REG_R1 = 1
+	UC_RH850_REG_R2 = 2
+	UC_RH850_REG_R3 = 3
+	UC_RH850_REG_R4 = 4
+	UC_RH850_REG_R5 = 5
+	UC_RH850_REG_R6 = 6
+	UC_RH850_REG_R7 = 7
+	UC_RH850_REG_R8 = 8
+	UC_RH850_REG_R9 = 9
+	UC_RH850_REG_R10 = 10
+	UC_RH850_REG_R11 = 11
+	UC_RH850_REG_R12 = 12
+	UC_RH850_REG_R13 = 13
+	UC_RH850_REG_R14 = 14
+	UC_RH850_REG_R15 = 15
+	UC_RH850_REG_R16 = 16
+	UC_RH850_REG_R17 = 17
+	UC_RH850_REG_R18 = 18
+	UC_RH850_REG_R19 = 19
+	UC_RH850_REG_R20 = 20
+	UC_RH850_REG_R21 = 21
+	UC_RH850_REG_R22 = 22
+	UC_RH850_REG_R23 = 23
+	UC_RH850_REG_R24 = 24
+	UC_RH850_REG_R25 = 25
+	UC_RH850_REG_R26 = 26
+	UC_RH850_REG_R27 = 27
+	UC_RH850_REG_R28 = 28
+	UC_RH850_REG_R29 = 29
+	UC_RH850_REG_R30 = 30
+	UC_RH850_REG_R31 = 31
+
+# RH850 system registers, selection ID 0
+	UC_RH850_REG_EIPC = 32
+	UC_RH850_REG_EIPSW = 33
+	UC_RH850_REG_FEPC = 34
+	UC_RH850_REG_FEPSW = 35
+	UC_RH850_REG_ECR = 36
+	UC_RH850_REG_PSW = 37
+	UC_RH850_REG_FPSR = 38
+	UC_RH850_REG_FPEPC = 39
+	UC_RH850_REG_FPST = 40
+	UC_RH850_REG_FPCC = 41
+	UC_RH850_REG_FPCFG = 42
+	UC_RH850_REG_FPEC = 43
+	UC_RH850_REG_EIIC = 45
+	UC_RH850_REG_FEIC = 46
+	UC_RH850_REG_CTPC = 48
+	UC_RH850_REG_CTPSW = 49
+	UC_RH850_REG_CTBP = 52
+	UC_RH850_REG_EIWR = 60
+	UC_RH850_REG_FEWR = 61
+	UC_RH850_REG_BSEL = 63
+
+# RH850 system regusters, selection ID 1
+	UC_RH850_REG_MCFG0 = 64
+	UC_RH850_REG_RBASE = 65
+	UC_RH850_REG_EBASE = 66
+	UC_RH850_REG_INTBP = 67
+	UC_RH850_REG_MCTL = 68
+	UC_RH850_REG_PID = 69
+	UC_RH850_REG_SCCFG = 75
+	UC_RH850_REG_SCBP = 76
+
+# RH850 system registers, selection ID 2
+	UC_RH850_REG_HTCFG0 = 96
+	UC_RH850_REG_MEA = 102
+	UC_RH850_REG_ASID = 103
+	UC_RH850_REG_MEI = 104
+	UC_RH850_REG_PC = 288
+	UC_RH850_REG_ENDING = 289
+
+# RH8509 Registers aliases.
+
+	UC_RH850_REG_ZERO = 0
+	UC_RH850_REG_SP = 3
+	UC_RH850_REG_EP = 30
+	UC_RH850_REG_LP = 31
+end
\ No newline at end of file
diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/unicorn_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/unicorn_const.rb
index c573a6f7c0..2f0a859740 100644
--- a/bindings/ruby/unicorn_gem/lib/unicorn_engine/unicorn_const.rb
+++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/unicorn_const.rb
@@ -21,7 +21,9 @@ module UnicornEngine
 	UC_ARCH_RISCV = 8
 	UC_ARCH_S390X = 9
 	UC_ARCH_TRICORE = 10
-	UC_ARCH_MAX = 11
+	UC_ARCH_AVR = 11
+	UC_ARCH_RH850 = 12
+	UC_ARCH_MAX = 13
 
 	UC_MODE_LITTLE_ENDIAN = 0
 	UC_MODE_BIG_ENDIAN = 1073741824
@@ -48,6 +50,7 @@ module UnicornEngine
 	UC_MODE_SPARC32 = 4
 	UC_MODE_SPARC64 = 8
 	UC_MODE_V9 = 16
+	UC_MODE_RH850 = 4
 	UC_MODE_RISCV32 = 4
 	UC_MODE_RISCV64 = 8
 
@@ -149,4 +152,4 @@ module UnicornEngine
 	UC_PROT_ALL = 7
 	UC_CTL_CONTEXT_CPU = 1
 	UC_CTL_CONTEXT_MEMORY = 2
-end
\ No newline at end of file
+end
diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs
index 821367d39b..f91a37a51d 100644
--- a/bindings/rust/build.rs
+++ b/bindings/rust/build.rs
@@ -116,6 +116,12 @@ fn build_with_cmake() {
     if std::env::var("CARGO_FEATURE_ARCH_TRICORE").is_ok() {
         archs.push_str("tricore;");
     }
+    if std::env::var("CARGO_FEATURE_ARCH_AVR").is_ok() {
+        archs.push_str("avr;");
+    }
+    if std::env::var("CARGO_FEATURE_ARCH_RH850").is_ok() {
+        archs.push_str("rh850;");
+    }
 
     if !archs.is_empty() {
         archs.pop();
diff --git a/bindings/rust/src/avr.rs b/bindings/rust/src/avr.rs
new file mode 100644
index 0000000000..1660933497
--- /dev/null
+++ b/bindings/rust/src/avr.rs
@@ -0,0 +1,211 @@
+#![allow(non_camel_case_types)]
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT
+
+#[repr(C)]
+#[derive(PartialEq, Debug, Clone, Copy)]
+pub enum RegisterAVR {
+    INVALID = 0,
+
+    // General purpose registers (GPR)
+    R0 = 1,
+    R1 = 2,
+    R2 = 3,
+    R3 = 4,
+    R4 = 5,
+    R5 = 6,
+    R6 = 7,
+    R7 = 8,
+    R8 = 9,
+    R9 = 10,
+    R10 = 11,
+    R11 = 12,
+    R12 = 13,
+    R13 = 14,
+    R14 = 15,
+    R15 = 16,
+    R16 = 17,
+    R17 = 18,
+    R18 = 19,
+    R19 = 20,
+    R20 = 21,
+    R21 = 22,
+    R22 = 23,
+    R23 = 24,
+    R24 = 25,
+    R25 = 26,
+    R26 = 27,
+    R27 = 28,
+    R28 = 29,
+    R29 = 30,
+    R30 = 31,
+    R31 = 32,
+
+    PC = 33,
+    SP = 34,
+
+    RAMPD = 57,
+    RAMPX = 58,
+    RAMPY = 59,
+    RAMPZ = 60,
+    EIND = 61,
+    SPL = 62,
+    SPH = 63,
+    SREG = 64,
+
+    // 16-bit coalesced registers
+    R0W = 65,
+    R1W = 66,
+    R2W = 67,
+    R3W = 68,
+    R4W = 69,
+    R5W = 70,
+    R6W = 71,
+    R7W = 72,
+    R8W = 73,
+    R9W = 74,
+    R10W = 75,
+    R11W = 76,
+    R12W = 77,
+    R13W = 78,
+    R14W = 79,
+    R15W = 80,
+    R16W = 81,
+    R17W = 82,
+    R18W = 83,
+    R19W = 84,
+    R20W = 85,
+    R21W = 86,
+    R22W = 87,
+    R23W = 88,
+    R24W = 89,
+    R25W = 90,
+    R26W = 91,
+    R27W = 92,
+    R28W = 93,
+    R29W = 94,
+    R30W = 95,
+
+    // 32-bit coalesced registers
+    R0D = 97,
+    R1D = 98,
+    R2D = 99,
+    R3D = 100,
+    R4D = 101,
+    R5D = 102,
+    R6D = 103,
+    R7D = 104,
+    R8D = 105,
+    R9D = 106,
+    R10D = 107,
+    R11D = 108,
+    R12D = 109,
+    R13D = 110,
+    R14D = 111,
+    R15D = 112,
+    R16D = 113,
+    R17D = 114,
+    R18D = 115,
+    R19D = 116,
+    R20D = 117,
+    R21D = 118,
+    R22D = 119,
+    R23D = 120,
+    R24D = 121,
+    R25D = 122,
+    R26D = 123,
+    R27D = 124,
+    R28D = 125,
+}
+
+impl RegisterAVR {
+    // alias registers
+    // (assoc) Xhi = 28
+    // (assoc) Xlo = 27
+    // (assoc) Yhi = 30
+    // (assoc) Ylo = 29
+    // (assoc) Zhi = 32
+    // (assoc) Zlo = 31
+    pub const XHI: RegisterAVR = RegisterAVR::R27;
+    pub const XLO: RegisterAVR = RegisterAVR::R26;
+    pub const YHI: RegisterAVR = RegisterAVR::R29;
+    pub const YLO: RegisterAVR = RegisterAVR::R28;
+    pub const ZHI: RegisterAVR = RegisterAVR::R31;
+    pub const ZLO: RegisterAVR = RegisterAVR::R30;
+
+    // (assoc) X = 91
+    // (assoc) Y = 93
+    // (assoc) Z = 95
+    pub const X: RegisterAVR = RegisterAVR::R26W;
+    pub const Y: RegisterAVR = RegisterAVR::R28W;
+    pub const Z: RegisterAVR = RegisterAVR::R30W;
+}
+
+impl From<RegisterAVR> for i32 {
+    fn from(r: RegisterAVR) -> Self {
+        r as i32
+    }
+}
+
+#[repr(C)]
+#[derive(PartialEq, Debug, Clone, Copy)]
+pub enum AvrArch {
+    UC_AVR_ARCH_AVR1 = 10,
+    UC_AVR_ARCH_AVR2 = 20,
+    UC_AVR_ARCH_AVR25 = 25,
+    UC_AVR_ARCH_AVR3 = 30,
+    UC_AVR_ARCH_AVR4 = 40,
+    UC_AVR_ARCH_AVR5 = 50,
+    UC_AVR_ARCH_AVR51 = 51,
+    UC_AVR_ARCH_AVR6 = 60,
+}
+
+impl From<AvrArch> for i32 {
+    fn from(value: AvrArch) -> Self {
+        value as i32
+    }
+}
+
+impl From<&AvrArch> for i32 {
+    fn from(value: &AvrArch) -> Self {
+        *value as i32
+    }
+}
+
+#[repr(C)]
+#[derive(PartialEq, Debug, Clone, Copy)]
+pub enum AvrCpuModel {
+    UC_CPU_AVR_ATMEGA16 = 50016,
+    UC_CPU_AVR_ATMEGA32 = 50032,
+    UC_CPU_AVR_ATMEGA64 = 50064,
+    UC_CPU_AVR_ATMEGA128 = 51128,
+    UC_CPU_AVR_ATMEGA128RFR2 = 51129,
+    UC_CPU_AVR_ATMEGA1280 = 51130,
+    UC_CPU_AVR_ATMEGA256 = 60256,
+    UC_CPU_AVR_ATMEGA256RFR2 = 60257,
+    UC_CPU_AVR_ATMEGA2560 = 60258,
+}
+
+impl From<AvrCpuModel> for i32 {
+    fn from(value: AvrCpuModel) -> Self {
+        value as i32
+    }
+}
+
+impl From<&AvrCpuModel> for i32 {
+    fn from(value: &AvrCpuModel) -> Self {
+        *value as i32
+    }
+}
+
+#[repr(i32)]
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
+pub enum AvrMem {
+    // Flash program memory (code)
+    FLASH = 0x08000000,
+}
+
+impl From<AvrMem> for i32 {
+    fn from(r: AvrMem) -> Self {
+        r as i32
+    }
+}
diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs
index 7cc95a3946..09b5b75281 100644
--- a/bindings/rust/src/lib.rs
+++ b/bindings/rust/src/lib.rs
@@ -59,6 +59,12 @@ mod arm64;
 #[cfg(feature = "arch_aarch64")]
 pub use crate::arm64::*;
 
+// include avr support if conditionally compiled in
+#[cfg(feature = "arch_avr")]
+mod avr;
+#[cfg(feature = "arch_avr")]
+pub use crate::avr::*;
+
 // include m68k support if conditionally compiled in
 #[cfg(feature = "arch_m68k")]
 mod m68k;
@@ -95,6 +101,12 @@ mod sparc;
 #[cfg(feature = "arch_sparc")]
 pub use crate::sparc::*;
 
+// include rh850 support if conditionally compiled in
+#[cfg(feature = "arch_rh850")]
+mod rh850;
+#[cfg(feature = "arch_rh850")]
+pub use crate::rh850::*;
+
 // include tricore support if conditionally compiled in
 #[cfg(feature = "arch_tricore")]
 mod tricore;
@@ -1052,6 +1064,10 @@ impl<'a, D> Unicorn<'a, D> {
             Arch::S390X => Ok(RegisterS390X::PC as i32),
             #[cfg(feature = "arch_tricore")]
             Arch::TRICORE => Ok(RegisterTRICORE::PC as i32),
+            #[cfg(feature = "arch_avr")]
+            Arch::AVR => Ok(RegisterAVR::PC as i32),
+            #[cfg(feature = "arch_rh850")]
+            Arch::RH850 => Ok(RegisterRH850::PC as i32),
             // returns `uc_error::ARCH` for `Arch::MAX`, and any
             // other architecture that are not compiled in
             _ => Err(uc_error::ARCH),
diff --git a/bindings/rust/src/rh850.rs b/bindings/rust/src/rh850.rs
new file mode 100644
index 0000000000..92e8a16a4a
--- /dev/null
+++ b/bindings/rust/src/rh850.rs
@@ -0,0 +1,119 @@
+#![allow(non_camel_case_types)]
+
+// RH850 registers
+#[repr(C)]
+#[derive(PartialEq, Debug, Clone, Copy)]
+pub enum RegisterRH850 {
+    INVALID = -1,
+
+    // General purpose registers
+    R0 = 0,
+    R1 = 1,
+    R2 = 2,
+    R3 = 3,
+    R4 = 4,
+    R5 = 5,
+    R6 = 6,
+    R7 = 7,
+    R8 = 8,
+    R9 = 9,
+    R10 = 10,
+    R11 = 11,
+    R12 = 12,
+    R13 = 13,
+    R14 = 14,
+    R15 = 15,
+    R16 = 16,
+    R17 = 17,
+    R18 = 18,
+    R19 = 19,
+    R20 = 20,
+    R21 = 21,
+    R22 = 22,
+    R23 = 23,
+    R24 = 24,
+    R25 = 25,
+    R26 = 26,
+    R27 = 27,
+    R28 = 28,
+    R29 = 29,
+    R30 = 30,
+    R31 = 31,
+
+    // System registers
+    EIPC = 32,
+    EIPSW = 33,
+    FEPC = 34,
+    FEPSW = 35,
+    ECR = 36,
+    PSW = 37,
+    FPSR = 38,
+    FPEPC = 39,
+    FPST = 40,
+    FPCC = 41,
+    FPCFG = 42,
+    FPEC = 43,
+    EIIC = 45,
+    FEIC = 46,
+    CTPC = 48,
+    CTPSW = 49,
+    CTBP = 52,
+    EIWR = 60,
+    FEWR = 61,
+    BSEL = 63,
+
+    // system registers, selection ID 1
+    MCFG0 = 64,
+    RBASE = 65,
+    EBASE = 66,
+    INTBP = 67,
+    MCTL = 68,
+    PID = 69,
+    SCCFG = 75,
+    SCBP = 76,
+
+    // system registers, selection ID 2
+    HTCFG0 = 96,
+    MEA = 102,
+    ASID = 103,
+    MEI = 104,
+    PC = 288,
+
+    ENDING = 289,
+}
+
+impl RegisterRH850 {
+    // Alias registers
+    // (assoc) ZERO = 0,
+    // (assoc) SP = 3,
+    // (assoc) EP = 30,
+    // (assoc) LP = 31,
+    pub const ZERO: RegisterRH850 = RegisterRH850::R0;
+    pub const SP: RegisterRH850 = RegisterRH850::R3;
+    pub const EP: RegisterRH850 = RegisterRH850::R30;
+    pub const LP: RegisterRH850 = RegisterRH850::R31;
+}
+
+impl From<RegisterRH850> for i32 {
+    fn from(r: RegisterRH850) -> Self {
+        r as i32
+    }
+}
+
+#[repr(i32)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum RH850CpuModel {
+    UC_CPU_RH850_ANY = 0,
+}
+
+impl From<RH850CpuModel> for i32 {
+    fn from(value: RH850CpuModel) -> Self {
+        value as i32
+    }
+}
+
+impl From<&RH850CpuModel> for i32 {
+    fn from(value: &RH850CpuModel) -> Self {
+        (*value) as i32
+    }
+}
\ No newline at end of file
diff --git a/bindings/rust/src/unicorn_const.rs b/bindings/rust/src/unicorn_const.rs
index e5a7d0c015..8b2883ebeb 100644
--- a/bindings/rust/src/unicorn_const.rs
+++ b/bindings/rust/src/unicorn_const.rs
@@ -193,7 +193,9 @@ pub enum Arch {
     RISCV = 8,
     S390X = 9,
     TRICORE = 10,
-    MAX = 11,
+    AVR = 11,
+    RH850 = 12,
+    MAX = 13,
 }
 
 impl TryFrom<usize> for Arch {
@@ -211,6 +213,7 @@ impl TryFrom<usize> for Arch {
             x if x == Self::RISCV as usize => Ok(Self::RISCV),
             x if x == Self::S390X as usize => Ok(Self::S390X),
             x if x == Self::TRICORE as usize => Ok(Self::TRICORE),
+            x if x == Self::RH850 as usize => Ok(Self::RH850),
             x if x == Self::MAX as usize => Ok(Self::MAX),
             _ => Err(uc_error::ARCH),
         }
@@ -246,6 +249,7 @@ bitflags! {
         const SPARC32 = Self::MIPS32.bits();
         const SPARC64 = Self::MIPS64.bits();
         const V9 = Self::THUMB.bits();
+        const RH850 = 4;
         const RISCV32 = Self::MIPS32.bits();
         const RISCV64 = Self::MIPS64.bits();
     }
diff --git a/bindings/zig/unicorn/AVR_const.zig b/bindings/zig/unicorn/AVR_const.zig
new file mode 100644
index 0000000000..e408f18e01
--- /dev/null
+++ b/bindings/zig/unicorn/AVR_const.zig
@@ -0,0 +1,151 @@
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT
+
+pub const AVRConst = enum(c_int) {
+
+// AVR architectures
+	AVR_ARCH_AVR1 = 10,
+	AVR_ARCH_AVR2 = 20,
+	AVR_ARCH_AVR25 = 25,
+	AVR_ARCH_AVR3 = 30,
+	AVR_ARCH_AVR4 = 40,
+	AVR_ARCH_AVR5 = 50,
+	AVR_ARCH_AVR51 = 51,
+	AVR_ARCH_AVR6 = 60,
+	CPU_AVR_ARCH = 1000,
+
+// AVR CPU
+	CPU_AVR_ATMEGA16 = 50016,
+	CPU_AVR_ATMEGA32 = 50032,
+	CPU_AVR_ATMEGA64 = 50064,
+	CPU_AVR_ATMEGA128 = 51128,
+	CPU_AVR_ATMEGA128RFR2 = 51129,
+	CPU_AVR_ATMEGA1280 = 51130,
+	CPU_AVR_ATMEGA256 = 60256,
+	CPU_AVR_ATMEGA256RFR2 = 60257,
+	CPU_AVR_ATMEGA2560 = 60258,
+
+// AVR memory
+	AVR_MEM_FLASH = 134217728,
+
+// AVR registers
+
+	AVR_REG_INVALID = 0,
+	AVR_REG_R0 = 1,
+	AVR_REG_R1 = 2,
+	AVR_REG_R2 = 3,
+	AVR_REG_R3 = 4,
+	AVR_REG_R4 = 5,
+	AVR_REG_R5 = 6,
+	AVR_REG_R6 = 7,
+	AVR_REG_R7 = 8,
+	AVR_REG_R8 = 9,
+	AVR_REG_R9 = 10,
+	AVR_REG_R10 = 11,
+	AVR_REG_R11 = 12,
+	AVR_REG_R12 = 13,
+	AVR_REG_R13 = 14,
+	AVR_REG_R14 = 15,
+	AVR_REG_R15 = 16,
+	AVR_REG_R16 = 17,
+	AVR_REG_R17 = 18,
+	AVR_REG_R18 = 19,
+	AVR_REG_R19 = 20,
+	AVR_REG_R20 = 21,
+	AVR_REG_R21 = 22,
+	AVR_REG_R22 = 23,
+	AVR_REG_R23 = 24,
+	AVR_REG_R24 = 25,
+	AVR_REG_R25 = 26,
+	AVR_REG_R26 = 27,
+	AVR_REG_R27 = 28,
+	AVR_REG_R28 = 29,
+	AVR_REG_R29 = 30,
+	AVR_REG_R30 = 31,
+	AVR_REG_R31 = 32,
+	AVR_REG_PC = 33,
+	AVR_REG_SP = 34,
+	AVR_REG_RAMPD = 57,
+	AVR_REG_RAMPX = 58,
+	AVR_REG_RAMPY = 59,
+	AVR_REG_RAMPZ = 60,
+	AVR_REG_EIND = 61,
+	AVR_REG_SPL = 62,
+	AVR_REG_SPH = 63,
+	AVR_REG_SREG = 64,
+
+// 16-bit coalesced registers
+	AVR_REG_R0W = 65,
+	AVR_REG_R1W = 66,
+	AVR_REG_R2W = 67,
+	AVR_REG_R3W = 68,
+	AVR_REG_R4W = 69,
+	AVR_REG_R5W = 70,
+	AVR_REG_R6W = 71,
+	AVR_REG_R7W = 72,
+	AVR_REG_R8W = 73,
+	AVR_REG_R9W = 74,
+	AVR_REG_R10W = 75,
+	AVR_REG_R11W = 76,
+	AVR_REG_R12W = 77,
+	AVR_REG_R13W = 78,
+	AVR_REG_R14W = 79,
+	AVR_REG_R15W = 80,
+	AVR_REG_R16W = 81,
+	AVR_REG_R17W = 82,
+	AVR_REG_R18W = 83,
+	AVR_REG_R19W = 84,
+	AVR_REG_R20W = 85,
+	AVR_REG_R21W = 86,
+	AVR_REG_R22W = 87,
+	AVR_REG_R23W = 88,
+	AVR_REG_R24W = 89,
+	AVR_REG_R25W = 90,
+	AVR_REG_R26W = 91,
+	AVR_REG_R27W = 92,
+	AVR_REG_R28W = 93,
+	AVR_REG_R29W = 94,
+	AVR_REG_R30W = 95,
+
+// 32-bit coalesced registers
+	AVR_REG_R0D = 97,
+	AVR_REG_R1D = 98,
+	AVR_REG_R2D = 99,
+	AVR_REG_R3D = 100,
+	AVR_REG_R4D = 101,
+	AVR_REG_R5D = 102,
+	AVR_REG_R6D = 103,
+	AVR_REG_R7D = 104,
+	AVR_REG_R8D = 105,
+	AVR_REG_R9D = 106,
+	AVR_REG_R10D = 107,
+	AVR_REG_R11D = 108,
+	AVR_REG_R12D = 109,
+	AVR_REG_R13D = 110,
+	AVR_REG_R14D = 111,
+	AVR_REG_R15D = 112,
+	AVR_REG_R16D = 113,
+	AVR_REG_R17D = 114,
+	AVR_REG_R18D = 115,
+	AVR_REG_R19D = 116,
+	AVR_REG_R20D = 117,
+	AVR_REG_R21D = 118,
+	AVR_REG_R22D = 119,
+	AVR_REG_R23D = 120,
+	AVR_REG_R24D = 121,
+	AVR_REG_R25D = 122,
+	AVR_REG_R26D = 123,
+	AVR_REG_R27D = 124,
+	AVR_REG_R28D = 125,
+
+// Alias registers
+	AVR_REG_Xhi = 28,
+	AVR_REG_Xlo = 27,
+	AVR_REG_Yhi = 30,
+	AVR_REG_Ylo = 29,
+	AVR_REG_Zhi = 32,
+	AVR_REG_Zlo = 31,
+	AVR_REG_X = 91,
+	AVR_REG_Y = 93,
+	AVR_REG_Z = 95,
+
+};
diff --git a/bindings/zig/unicorn/rh850_const.zig b/bindings/zig/unicorn/rh850_const.zig
new file mode 100644
index 0000000000..3b965c6333
--- /dev/null
+++ b/bindings/zig/unicorn/rh850_const.zig
@@ -0,0 +1,95 @@
+// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT
+
+pub const rh850Const = enum(c_int) {
+	RH850_SYSREG_SELID0 = 32,
+	RH850_SYSREG_SELID1 = 64,
+	RH850_SYSREG_SELID2 = 96,
+	RH850_SYSREG_SELID3 = 128,
+	RH850_SYSREG_SELID4 = 160,
+	RH850_SYSREG_SELID5 = 192,
+	RH850_SYSREG_SELID6 = 224,
+	RH850_SYSREG_SELID7 = 256,
+
+// RH850 global purpose registers
+
+	RH850_REG_R0 = 0,
+	RH850_REG_R1 = 1,
+	RH850_REG_R2 = 2,
+	RH850_REG_R3 = 3,
+	RH850_REG_R4 = 4,
+	RH850_REG_R5 = 5,
+	RH850_REG_R6 = 6,
+	RH850_REG_R7 = 7,
+	RH850_REG_R8 = 8,
+	RH850_REG_R9 = 9,
+	RH850_REG_R10 = 10,
+	RH850_REG_R11 = 11,
+	RH850_REG_R12 = 12,
+	RH850_REG_R13 = 13,
+	RH850_REG_R14 = 14,
+	RH850_REG_R15 = 15,
+	RH850_REG_R16 = 16,
+	RH850_REG_R17 = 17,
+	RH850_REG_R18 = 18,
+	RH850_REG_R19 = 19,
+	RH850_REG_R20 = 20,
+	RH850_REG_R21 = 21,
+	RH850_REG_R22 = 22,
+	RH850_REG_R23 = 23,
+	RH850_REG_R24 = 24,
+	RH850_REG_R25 = 25,
+	RH850_REG_R26 = 26,
+	RH850_REG_R27 = 27,
+	RH850_REG_R28 = 28,
+	RH850_REG_R29 = 29,
+	RH850_REG_R30 = 30,
+	RH850_REG_R31 = 31,
+
+// RH850 system registers, selection ID 0
+	RH850_REG_EIPC = 32,
+	RH850_REG_EIPSW = 33,
+	RH850_REG_FEPC = 34,
+	RH850_REG_FEPSW = 35,
+	RH850_REG_ECR = 36,
+	RH850_REG_PSW = 37,
+	RH850_REG_FPSR = 38,
+	RH850_REG_FPEPC = 39,
+	RH850_REG_FPST = 40,
+	RH850_REG_FPCC = 41,
+	RH850_REG_FPCFG = 42,
+	RH850_REG_FPEC = 43,
+	RH850_REG_EIIC = 45,
+	RH850_REG_FEIC = 46,
+	RH850_REG_CTPC = 48,
+	RH850_REG_CTPSW = 49,
+	RH850_REG_CTBP = 52,
+	RH850_REG_EIWR = 60,
+	RH850_REG_FEWR = 61,
+	RH850_REG_BSEL = 63,
+
+// RH850 system regusters, selection ID 1
+	RH850_REG_MCFG0 = 64,
+	RH850_REG_RBASE = 65,
+	RH850_REG_EBASE = 66,
+	RH850_REG_INTBP = 67,
+	RH850_REG_MCTL = 68,
+	RH850_REG_PID = 69,
+	RH850_REG_SCCFG = 75,
+	RH850_REG_SCBP = 76,
+
+// RH850 system registers, selection ID 2
+	RH850_REG_HTCFG0 = 96,
+	RH850_REG_MEA = 102,
+	RH850_REG_ASID = 103,
+	RH850_REG_MEI = 104,
+	RH850_REG_PC = 288,
+	RH850_REG_ENDING = 289,
+
+// RH8509 Registers aliases.
+
+	RH850_REG_ZERO = 0,
+	RH850_REG_SP = 3,
+	RH850_REG_EP = 30,
+	RH850_REG_LP = 31,
+
+};
diff --git a/bindings/zig/unicorn/unicorn_const.zig b/bindings/zig/unicorn/unicorn_const.zig
index 7991ff9783..2c3250b6be 100644
--- a/bindings/zig/unicorn/unicorn_const.zig
+++ b/bindings/zig/unicorn/unicorn_const.zig
@@ -21,7 +21,9 @@ pub const unicornConst = enum(c_int) {
 	ARCH_RISCV = 8,
 	ARCH_S390X = 9,
 	ARCH_TRICORE = 10,
-	ARCH_MAX = 11,
+	ARCH_AVR = 11,
+	ARCH_RH850 = 12,
+	ARCH_MAX = 13,
 
 	MODE_LITTLE_ENDIAN = 0,
 	MODE_BIG_ENDIAN = 1073741824,
@@ -48,6 +50,7 @@ pub const unicornConst = enum(c_int) {
 	MODE_SPARC32 = 4,
 	MODE_SPARC64 = 8,
 	MODE_V9 = 16,
+	MODE_RH850 = 4,
 	MODE_RISCV32 = 4,
 	MODE_RISCV64 = 8,
 
diff --git a/build.zig b/build.zig
index e1eae62470..ca5f905bfb 100644
--- a/build.zig
+++ b/build.zig
@@ -74,6 +74,7 @@ pub fn build(b: *std.Build) void {
         .{ .file_type = .zig, .root_file_path = "bindings/zig/sample/sample_riscv_zig.zig" },
         .{ .file_type = .c, .root_file_path = "samples/sample_arm.c" },
         .{ .file_type = .c, .root_file_path = "samples/sample_arm64.c" },
+        .{ .file_type = .c, .root_file_path = "samples/sample_avr.c" },
         .{ .file_type = .c, .root_file_path = "samples/sample_ctl.c" },
         .{ .file_type = .c, .root_file_path = "samples/sample_batch_reg.c" },
         .{ .file_type = .c, .root_file_path = "samples/sample_m68k.c" },
diff --git a/format.sh b/format.sh
old mode 100644
new mode 100755
diff --git a/include/uc_priv.h b/include/uc_priv.h
index 484fc53e3f..f3a1c045e7 100644
--- a/include/uc_priv.h
+++ b/include/uc_priv.h
@@ -35,6 +35,7 @@
     (UC_MODE_RISCV32 | UC_MODE_RISCV64 | UC_MODE_LITTLE_ENDIAN)
 #define UC_MODE_S390X_MASK (UC_MODE_BIG_ENDIAN)
 #define UC_MODE_TRICORE_MASK (UC_MODE_LITTLE_ENDIAN)
+#define UC_MODE_AVR_MASK (UC_MODE_LITTLE_ENDIAN)
 
 #define ARR_SIZE(a) (sizeof(a) / sizeof(a[0]))
 
diff --git a/include/unicorn/avr.h b/include/unicorn/avr.h
new file mode 100644
index 0000000000..0487d3fd09
--- /dev/null
+++ b/include/unicorn/avr.h
@@ -0,0 +1,189 @@
+/* This file is released under LGPL2.
+   See COPYING.LGPL2 in root directory for more details
+*/
+
+/*
+   Created for Unicorn Engine by Glenn Baker <glenn.baker@gmx.com>, 2024
+*/
+
+#ifndef UNICORN_AVR_H
+#define UNICORN_AVR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4201)
+#endif
+
+//> AVR architectures
+typedef enum uc_avr_arch {
+    UC_AVR_ARCH_AVR1 = 10,
+    UC_AVR_ARCH_AVR2 = 20,
+    UC_AVR_ARCH_AVR25 = 25,
+    UC_AVR_ARCH_AVR3 = 30,
+    UC_AVR_ARCH_AVR4 = 40,
+    UC_AVR_ARCH_AVR5 = 50,
+    UC_AVR_ARCH_AVR51 = 51,
+    UC_AVR_ARCH_AVR6 = 60,
+} uc_avr_arch;
+
+#define UC_CPU_AVR_ARCH 1000
+
+//> AVR CPU
+typedef enum uc_cpu_avr {
+    // Enhanced Core with 16K up to 64K of program memory ("AVR5")
+    UC_CPU_AVR_ATMEGA16 = UC_AVR_ARCH_AVR5*UC_CPU_AVR_ARCH + 16,
+    UC_CPU_AVR_ATMEGA32 = UC_AVR_ARCH_AVR5*UC_CPU_AVR_ARCH + 32,
+    UC_CPU_AVR_ATMEGA64 = UC_AVR_ARCH_AVR5*UC_CPU_AVR_ARCH + 64,
+
+    // Enhanced Core with 128K of program memory ("AVR5.1")
+    UC_CPU_AVR_ATMEGA128 = UC_AVR_ARCH_AVR51*UC_CPU_AVR_ARCH + 128,
+    UC_CPU_AVR_ATMEGA128RFR2,
+    UC_CPU_AVR_ATMEGA1280,
+
+    // Enhanced Core with 128K+ of program memory, i.e. 3-byte PC ("AVR6")
+    UC_CPU_AVR_ATMEGA256 = UC_AVR_ARCH_AVR6*UC_CPU_AVR_ARCH + 256,
+    UC_CPU_AVR_ATMEGA256RFR2,
+    UC_CPU_AVR_ATMEGA2560,
+} uc_cpu_avr;
+
+//> AVR memory
+typedef enum uc_avr_mem {
+    // Flash program memory (code)
+    UC_AVR_MEM_FLASH = 0x08000000,
+} uc_avr_mem;
+
+//> AVR registers
+typedef enum uc_avr_reg {
+    UC_AVR_REG_INVALID = 0,
+
+    // General purpose registers (GPR)
+    UC_AVR_REG_R0 = 1,
+    UC_AVR_REG_R1,
+    UC_AVR_REG_R2,
+    UC_AVR_REG_R3,
+    UC_AVR_REG_R4,
+    UC_AVR_REG_R5,
+    UC_AVR_REG_R6,
+    UC_AVR_REG_R7,
+    UC_AVR_REG_R8,
+    UC_AVR_REG_R9,
+    UC_AVR_REG_R10,
+    UC_AVR_REG_R11,
+    UC_AVR_REG_R12,
+    UC_AVR_REG_R13,
+    UC_AVR_REG_R14,
+    UC_AVR_REG_R15,
+    UC_AVR_REG_R16,
+    UC_AVR_REG_R17,
+    UC_AVR_REG_R18,
+    UC_AVR_REG_R19,
+    UC_AVR_REG_R20,
+    UC_AVR_REG_R21,
+    UC_AVR_REG_R22,
+    UC_AVR_REG_R23,
+    UC_AVR_REG_R24,
+    UC_AVR_REG_R25,
+    UC_AVR_REG_R26,
+    UC_AVR_REG_R27,
+    UC_AVR_REG_R28,
+    UC_AVR_REG_R29,
+    UC_AVR_REG_R30,
+    UC_AVR_REG_R31,
+
+    UC_AVR_REG_PC,
+    UC_AVR_REG_SP,
+
+    UC_AVR_REG_RAMPD = UC_AVR_REG_PC + 16 + 8,
+    UC_AVR_REG_RAMPX,
+    UC_AVR_REG_RAMPY,
+    UC_AVR_REG_RAMPZ,
+    UC_AVR_REG_EIND,
+    UC_AVR_REG_SPL,
+    UC_AVR_REG_SPH,
+    UC_AVR_REG_SREG,
+
+    //> 16-bit coalesced registers
+    UC_AVR_REG_R0W = UC_AVR_REG_PC + 32,
+    UC_AVR_REG_R1W,
+    UC_AVR_REG_R2W,
+    UC_AVR_REG_R3W,
+    UC_AVR_REG_R4W,
+    UC_AVR_REG_R5W,
+    UC_AVR_REG_R6W,
+    UC_AVR_REG_R7W,
+    UC_AVR_REG_R8W,
+    UC_AVR_REG_R9W,
+    UC_AVR_REG_R10W,
+    UC_AVR_REG_R11W,
+    UC_AVR_REG_R12W,
+    UC_AVR_REG_R13W,
+    UC_AVR_REG_R14W,
+    UC_AVR_REG_R15W,
+    UC_AVR_REG_R16W,
+    UC_AVR_REG_R17W,
+    UC_AVR_REG_R18W,
+    UC_AVR_REG_R19W,
+    UC_AVR_REG_R20W,
+    UC_AVR_REG_R21W,
+    UC_AVR_REG_R22W,
+    UC_AVR_REG_R23W,
+    UC_AVR_REG_R24W,
+    UC_AVR_REG_R25W,
+    UC_AVR_REG_R26W,
+    UC_AVR_REG_R27W,
+    UC_AVR_REG_R28W,
+    UC_AVR_REG_R29W,
+    UC_AVR_REG_R30W,
+
+    //> 32-bit coalesced registers
+    UC_AVR_REG_R0D = UC_AVR_REG_PC + 64,
+    UC_AVR_REG_R1D,
+    UC_AVR_REG_R2D,
+    UC_AVR_REG_R3D,
+    UC_AVR_REG_R4D,
+    UC_AVR_REG_R5D,
+    UC_AVR_REG_R6D,
+    UC_AVR_REG_R7D,
+    UC_AVR_REG_R8D,
+    UC_AVR_REG_R9D,
+    UC_AVR_REG_R10D,
+    UC_AVR_REG_R11D,
+    UC_AVR_REG_R12D,
+    UC_AVR_REG_R13D,
+    UC_AVR_REG_R14D,
+    UC_AVR_REG_R15D,
+    UC_AVR_REG_R16D,
+    UC_AVR_REG_R17D,
+    UC_AVR_REG_R18D,
+    UC_AVR_REG_R19D,
+    UC_AVR_REG_R20D,
+    UC_AVR_REG_R21D,
+    UC_AVR_REG_R22D,
+    UC_AVR_REG_R23D,
+    UC_AVR_REG_R24D,
+    UC_AVR_REG_R25D,
+    UC_AVR_REG_R26D,
+    UC_AVR_REG_R27D,
+    UC_AVR_REG_R28D,
+
+    //> Alias registers
+    UC_AVR_REG_Xhi = UC_AVR_REG_R27,
+    UC_AVR_REG_Xlo = UC_AVR_REG_R26,
+    UC_AVR_REG_Yhi = UC_AVR_REG_R29,
+    UC_AVR_REG_Ylo = UC_AVR_REG_R28,
+    UC_AVR_REG_Zhi = UC_AVR_REG_R31,
+    UC_AVR_REG_Zlo = UC_AVR_REG_R30,
+
+    UC_AVR_REG_X = UC_AVR_REG_R26W,
+    UC_AVR_REG_Y = UC_AVR_REG_R28W,
+    UC_AVR_REG_Z = UC_AVR_REG_R30W,
+} uc_avr_reg;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* UNICORN_AVR_H */
diff --git a/include/unicorn/rh850.h b/include/unicorn/rh850.h
new file mode 100644
index 0000000000..963e0bc042
--- /dev/null
+++ b/include/unicorn/rh850.h
@@ -0,0 +1,111 @@
+/* Unicorn Engine */
+/* By Damien Cauquil <dcauquil@quarkslab.com>, 2023 */
+
+#ifndef UNICORN_RH850_H
+#define UNICORN_RH850_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4201)
+#endif
+
+#define UC_RH850_SYSREG_SELID0   32
+#define UC_RH850_SYSREG_SELID1   64
+#define UC_RH850_SYSREG_SELID2   96
+#define UC_RH850_SYSREG_SELID3   128
+#define UC_RH850_SYSREG_SELID4   160
+#define UC_RH850_SYSREG_SELID5   192
+#define UC_RH850_SYSREG_SELID6   224
+#define UC_RH850_SYSREG_SELID7   256
+
+//> RH850 global purpose registers
+typedef enum uc_rh850_reg {
+    UC_RH850_REG_R0 = 0,
+    UC_RH850_REG_R1,
+    UC_RH850_REG_R2,
+    UC_RH850_REG_R3,
+    UC_RH850_REG_R4,
+    UC_RH850_REG_R5,
+    UC_RH850_REG_R6,
+    UC_RH850_REG_R7,
+    UC_RH850_REG_R8,
+    UC_RH850_REG_R9,
+    UC_RH850_REG_R10,
+    UC_RH850_REG_R11,
+    UC_RH850_REG_R12,
+    UC_RH850_REG_R13,
+    UC_RH850_REG_R14,
+    UC_RH850_REG_R15,
+    UC_RH850_REG_R16,
+    UC_RH850_REG_R17,
+    UC_RH850_REG_R18,
+    UC_RH850_REG_R19,
+    UC_RH850_REG_R20,
+    UC_RH850_REG_R21,
+    UC_RH850_REG_R22,
+    UC_RH850_REG_R23,
+    UC_RH850_REG_R24,
+    UC_RH850_REG_R25,
+    UC_RH850_REG_R26,
+    UC_RH850_REG_R27,
+    UC_RH850_REG_R28,
+    UC_RH850_REG_R29,
+    UC_RH850_REG_R30,
+    UC_RH850_REG_R31,
+
+    //> RH850 system registers, selection ID 0
+    UC_RH850_REG_EIPC = UC_RH850_SYSREG_SELID0,
+    UC_RH850_REG_EIPSW,
+    UC_RH850_REG_FEPC,
+    UC_RH850_REG_FEPSW,
+    UC_RH850_REG_ECR,
+    UC_RH850_REG_PSW,
+    UC_RH850_REG_FPSR,
+    UC_RH850_REG_FPEPC,
+    UC_RH850_REG_FPST,
+    UC_RH850_REG_FPCC,
+    UC_RH850_REG_FPCFG,
+    UC_RH850_REG_FPEC,
+    UC_RH850_REG_EIIC = UC_RH850_SYSREG_SELID0 + 13,
+    UC_RH850_REG_FEIC,
+    UC_RH850_REG_CTPC = UC_RH850_SYSREG_SELID0 + 16,
+    UC_RH850_REG_CTPSW,
+    UC_RH850_REG_CTBP = UC_RH850_SYSREG_SELID0 + 20,
+    UC_RH850_REG_EIWR = UC_RH850_SYSREG_SELID0 + 28,
+    UC_RH850_REG_FEWR = UC_RH850_SYSREG_SELID0 + 29,
+    UC_RH850_REG_BSEL = UC_RH850_SYSREG_SELID0 + 31,
+
+    //> RH850 system regusters, selection ID 1
+    UC_RH850_REG_MCFG0 = UC_RH850_SYSREG_SELID1,
+    UC_RH850_REG_RBASE,
+    UC_RH850_REG_EBASE,
+    UC_RH850_REG_INTBP,
+    UC_RH850_REG_MCTL,
+    UC_RH850_REG_PID,
+    UC_RH850_REG_SCCFG = UC_RH850_SYSREG_SELID1 + 11,
+    UC_RH850_REG_SCBP,
+
+    //> RH850 system registers, selection ID 2
+    UC_RH850_REG_HTCFG0 = UC_RH850_SYSREG_SELID2,
+    UC_RH850_REG_MEA = UC_RH850_SYSREG_SELID2 + 6,
+    UC_RH850_REG_ASID,
+    UC_RH850_REG_MEI,
+
+    UC_RH850_REG_PC = UC_RH850_SYSREG_SELID7 + 32,
+    UC_RH850_REG_ENDING
+} uc_cpu_rh850;
+
+//> RH8509 Registers aliases.
+#define UC_RH850_REG_ZERO        UC_RH850_REG_R0
+#define UC_RH850_REG_SP          UC_RH850_REG_R3
+#define UC_RH850_REG_EP          UC_RH850_REG_R30
+#define UC_RH850_REG_LP          UC_RH850_REG_R31
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/include/unicorn/unicorn.h b/include/unicorn/unicorn.h
index faac1378c5..9c9d77b6ce 100644
--- a/include/unicorn/unicorn.h
+++ b/include/unicorn/unicorn.h
@@ -33,9 +33,11 @@ typedef size_t uc_hook;
 #include "mips.h"
 #include "sparc.h"
 #include "ppc.h"
+#include "rh850.h"
 #include "riscv.h"
 #include "s390x.h"
 #include "tricore.h"
+#include "avr.h"
 
 #ifdef __GNUC__
 #define DEFAULT_VISIBILITY __attribute__((visibility("default")))
@@ -118,6 +120,8 @@ typedef enum uc_arch {
     UC_ARCH_RISCV,   // RISCV architecture
     UC_ARCH_S390X,   // S390X architecture
     UC_ARCH_TRICORE, // TriCore architecture
+    UC_ARCH_AVR,     // AVR architecture
+    UC_ARCH_RH850,   // Renesas RH850 architecture (V850e3v2)
     UC_ARCH_MAX,
 } uc_arch;
 
@@ -164,6 +168,9 @@ typedef enum uc_mode {
     UC_MODE_SPARC64 = 1 << 3, // 64-bit mode
     UC_MODE_V9 = 1 << 4,      // SparcV9 mode (currently unsupported)
 
+    // rh850
+    UC_MODE_RH850 = 1 << 2, // 32-bit mode
+
     // riscv
     UC_MODE_RISCV32 = 1 << 2, // 32-bit mode
     UC_MODE_RISCV64 = 1 << 3, // 64-bit mode
diff --git a/msvc/avr-softmmu/config-target.h b/msvc/avr-softmmu/config-target.h
new file mode 100644
index 0000000000..3afeec6c7a
--- /dev/null
+++ b/msvc/avr-softmmu/config-target.h
@@ -0,0 +1,5 @@
+/* Automatically generated by create_config - do not modify */
+#define TARGET_AVR 1
+#define TARGET_NAME "avr"
+#define TARGET_AVR 1
+#define CONFIG_SOFTMMU 1
diff --git a/msvc/rh850-softmmu/config-target.h b/msvc/rh850-softmmu/config-target.h
new file mode 100644
index 0000000000..69d3c14d97
--- /dev/null
+++ b/msvc/rh850-softmmu/config-target.h
@@ -0,0 +1,6 @@
+/* Automatically generated by create_config - do not modify */
+#define TARGET_RH850 1
+#define TARGET_NAME "rh850"
+#define TARGET_RH850 1
+#define TARGET_SYSTBL_ABI common,nospu,64
+#define CONFIG_SOFTMMU 1
diff --git a/qemu/MAINTAINERS b/qemu/MAINTAINERS
index 8cbc1fac2b..ed80410291 100644
--- a/qemu/MAINTAINERS
+++ b/qemu/MAINTAINERS
@@ -952,6 +952,15 @@ F: include/hw/*/nrf51*.h
 F: include/hw/*/microbit*.h
 F: tests/qtest/microbit-test.c
 
+AVR Machines
+-------------
+
+AVR MCUs
+M: Michael Rolnik <mrolnik@gmail.com>
+R: Sarah Harris <S.E.Harris@kent.ac.uk>
+S: Maintained
+F: default-configs/avr-softmmu.mak
+
 CRIS Machines
 -------------
 Axis Dev88
diff --git a/qemu/avr.h b/qemu/avr.h
new file mode 100644
index 0000000000..bb37176913
--- /dev/null
+++ b/qemu/avr.h
@@ -0,0 +1,1297 @@
+/* Autogen header for Unicorn Engine - DONOT MODIFY */
+#ifndef UNICORN_AUTOGEN_avr_H
+#define UNICORN_AUTOGEN_avr_H
+#ifndef UNICORN_ARCH_POSTFIX
+#define UNICORN_ARCH_POSTFIX _avr
+#endif
+#define uc_add_inline_hook uc_add_inline_hook_avr
+#define uc_del_inline_hook uc_del_inline_hook_avr
+#define tb_invalidate_phys_range tb_invalidate_phys_range_avr
+#define use_idiv_instructions use_idiv_instructions_avr
+#define arm_arch arm_arch_avr
+#define tb_target_set_jmp_target tb_target_set_jmp_target_avr
+#define have_bmi1 have_bmi1_avr
+#define have_popcnt have_popcnt_avr
+#define have_avx1 have_avx1_avr
+#define have_avx2 have_avx2_avr
+#define have_isa have_isa_avr
+#define have_altivec have_altivec_avr
+#define have_vsx have_vsx_avr
+#define flush_icache_range flush_icache_range_avr
+#define s390_facilities s390_facilities_avr
+#define tcg_dump_op tcg_dump_op_avr
+#define tcg_dump_ops tcg_dump_ops_avr
+#define tcg_gen_and_i64 tcg_gen_and_i64_avr
+#define tcg_gen_discard_i64 tcg_gen_discard_i64_avr
+#define tcg_gen_ld16s_i64 tcg_gen_ld16s_i64_avr
+#define tcg_gen_ld16u_i64 tcg_gen_ld16u_i64_avr
+#define tcg_gen_ld32s_i64 tcg_gen_ld32s_i64_avr
+#define tcg_gen_ld32u_i64 tcg_gen_ld32u_i64_avr
+#define tcg_gen_ld8s_i64 tcg_gen_ld8s_i64_avr
+#define tcg_gen_ld8u_i64 tcg_gen_ld8u_i64_avr
+#define tcg_gen_ld_i64 tcg_gen_ld_i64_avr
+#define tcg_gen_mov_i64 tcg_gen_mov_i64_avr
+#define tcg_gen_movi_i64 tcg_gen_movi_i64_avr
+#define tcg_gen_mul_i64 tcg_gen_mul_i64_avr
+#define tcg_gen_or_i64 tcg_gen_or_i64_avr
+#define tcg_gen_sar_i64 tcg_gen_sar_i64_avr
+#define tcg_gen_shl_i64 tcg_gen_shl_i64_avr
+#define tcg_gen_shr_i64 tcg_gen_shr_i64_avr
+#define tcg_gen_st_i64 tcg_gen_st_i64_avr
+#define tcg_gen_xor_i64 tcg_gen_xor_i64_avr
+#define cpu_icount_to_ns cpu_icount_to_ns_avr
+#define cpu_is_stopped cpu_is_stopped_avr
+#define cpu_get_ticks cpu_get_ticks_avr
+#define cpu_get_clock cpu_get_clock_avr
+#define cpu_resume cpu_resume_avr
+#define qemu_init_vcpu qemu_init_vcpu_avr
+#define cpu_stop_current cpu_stop_current_avr
+#define resume_all_vcpus resume_all_vcpus_avr
+#define vm_start vm_start_avr
+#define address_space_dispatch_compact address_space_dispatch_compact_avr
+#define flatview_translate flatview_translate_avr
+#define flatview_copy flatview_copy_avr
+#define address_space_translate_for_iotlb address_space_translate_for_iotlb_avr
+#define qemu_get_cpu qemu_get_cpu_avr
+#define cpu_address_space_init cpu_address_space_init_avr
+#define cpu_get_address_space cpu_get_address_space_avr
+#define cpu_exec_unrealizefn cpu_exec_unrealizefn_avr
+#define cpu_exec_initfn cpu_exec_initfn_avr
+#define cpu_exec_realizefn cpu_exec_realizefn_avr
+#define tb_invalidate_phys_addr tb_invalidate_phys_addr_avr
+#define cpu_watchpoint_insert cpu_watchpoint_insert_avr
+#define cpu_watchpoint_remove_by_ref cpu_watchpoint_remove_by_ref_avr
+#define cpu_watchpoint_remove_all cpu_watchpoint_remove_all_avr
+#define cpu_watchpoint_address_matches cpu_watchpoint_address_matches_avr
+#define cpu_breakpoint_insert cpu_breakpoint_insert_avr
+#define cpu_breakpoint_remove cpu_breakpoint_remove_avr
+#define cpu_breakpoint_remove_by_ref cpu_breakpoint_remove_by_ref_avr
+#define cpu_breakpoint_remove_all cpu_breakpoint_remove_all_avr
+#define cpu_abort cpu_abort_avr
+#define cpu_physical_memory_test_and_clear_dirty cpu_physical_memory_test_and_clear_dirty_avr
+#define memory_region_section_get_iotlb memory_region_section_get_iotlb_avr
+#define flatview_add_to_dispatch flatview_add_to_dispatch_avr
+#define qemu_ram_get_host_addr qemu_ram_get_host_addr_avr
+#define qemu_ram_get_offset qemu_ram_get_offset_avr
+#define qemu_ram_get_used_length qemu_ram_get_used_length_avr
+#define qemu_ram_is_shared qemu_ram_is_shared_avr
+#define qemu_ram_pagesize qemu_ram_pagesize_avr
+#define qemu_ram_alloc_from_ptr qemu_ram_alloc_from_ptr_avr
+#define qemu_ram_alloc qemu_ram_alloc_avr
+#define qemu_ram_free qemu_ram_free_avr
+#define qemu_map_ram_ptr qemu_map_ram_ptr_avr
+#define qemu_ram_block_host_offset qemu_ram_block_host_offset_avr
+#define qemu_ram_block_from_host qemu_ram_block_from_host_avr
+#define qemu_ram_addr_from_host qemu_ram_addr_from_host_avr
+#define cpu_check_watchpoint cpu_check_watchpoint_avr
+#define iotlb_to_section iotlb_to_section_avr
+#define address_space_dispatch_new address_space_dispatch_new_avr
+#define address_space_dispatch_free address_space_dispatch_free_avr
+#define address_space_dispatch_clear address_space_dispatch_clear_avr
+#define flatview_read_continue flatview_read_continue_avr
+#define address_space_read_full address_space_read_full_avr
+#define address_space_write address_space_write_avr
+#define address_space_rw address_space_rw_avr
+#define cpu_physical_memory_rw cpu_physical_memory_rw_avr
+#define address_space_write_rom address_space_write_rom_avr
+#define cpu_flush_icache_range cpu_flush_icache_range_avr
+#define cpu_exec_init_all cpu_exec_init_all_avr
+#define address_space_access_valid address_space_access_valid_avr
+#define address_space_map address_space_map_avr
+#define address_space_unmap address_space_unmap_avr
+#define cpu_physical_memory_map cpu_physical_memory_map_avr
+#define cpu_physical_memory_unmap cpu_physical_memory_unmap_avr
+#define cpu_memory_rw_debug cpu_memory_rw_debug_avr
+#define qemu_target_page_size qemu_target_page_size_avr
+#define qemu_target_page_bits qemu_target_page_bits_avr
+#define qemu_target_page_bits_min qemu_target_page_bits_min_avr
+#define target_words_bigendian target_words_bigendian_avr
+#define cpu_physical_memory_is_io cpu_physical_memory_is_io_avr
+#define ram_block_discard_range ram_block_discard_range_avr
+#define ramblock_is_pmem ramblock_is_pmem_avr
+#define page_size_init page_size_init_avr
+#define set_preferred_target_page_bits set_preferred_target_page_bits_avr
+#define finalize_target_page_bits finalize_target_page_bits_avr
+#define cpu_outb cpu_outb_avr
+#define cpu_outw cpu_outw_avr
+#define cpu_outl cpu_outl_avr
+#define cpu_inb cpu_inb_avr
+#define cpu_inw cpu_inw_avr
+#define cpu_inl cpu_inl_avr
+#define memory_map memory_map_avr
+#define memory_map_io memory_map_io_avr
+#define memory_map_ptr memory_map_ptr_avr
+#define memory_unmap memory_unmap_avr
+#define memory_free memory_free_avr
+#define flatview_unref flatview_unref_avr
+#define address_space_get_flatview address_space_get_flatview_avr
+#define memory_region_transaction_begin memory_region_transaction_begin_avr
+#define memory_region_transaction_commit memory_region_transaction_commit_avr
+#define memory_region_init memory_region_init_avr
+#define memory_region_access_valid memory_region_access_valid_avr
+#define memory_region_dispatch_read memory_region_dispatch_read_avr
+#define memory_region_dispatch_write memory_region_dispatch_write_avr
+#define memory_region_init_io memory_region_init_io_avr
+#define memory_region_init_ram_ptr memory_region_init_ram_ptr_avr
+#define memory_region_size memory_region_size_avr
+#define memory_region_set_readonly memory_region_set_readonly_avr
+#define memory_region_get_ram_ptr memory_region_get_ram_ptr_avr
+#define memory_region_from_host memory_region_from_host_avr
+#define memory_region_get_ram_addr memory_region_get_ram_addr_avr
+#define memory_region_add_subregion memory_region_add_subregion_avr
+#define memory_region_del_subregion memory_region_del_subregion_avr
+#define memory_region_find memory_region_find_avr
+#define memory_listener_register memory_listener_register_avr
+#define memory_listener_unregister memory_listener_unregister_avr
+#define address_space_remove_listeners address_space_remove_listeners_avr
+#define address_space_init address_space_init_avr
+#define address_space_destroy address_space_destroy_avr
+#define memory_region_init_ram memory_region_init_ram_avr
+#define memory_mapping_list_add_merge_sorted memory_mapping_list_add_merge_sorted_avr
+#define exec_inline_op exec_inline_op_avr
+#define floatx80_default_nan floatx80_default_nan_avr
+#define float_raise float_raise_avr
+#define float16_is_quiet_nan float16_is_quiet_nan_avr
+#define float16_is_signaling_nan float16_is_signaling_nan_avr
+#define float32_is_quiet_nan float32_is_quiet_nan_avr
+#define float32_is_signaling_nan float32_is_signaling_nan_avr
+#define float64_is_quiet_nan float64_is_quiet_nan_avr
+#define float64_is_signaling_nan float64_is_signaling_nan_avr
+#define floatx80_is_quiet_nan floatx80_is_quiet_nan_avr
+#define floatx80_is_signaling_nan floatx80_is_signaling_nan_avr
+#define floatx80_silence_nan floatx80_silence_nan_avr
+#define propagateFloatx80NaN propagateFloatx80NaN_avr
+#define float128_is_quiet_nan float128_is_quiet_nan_avr
+#define float128_is_signaling_nan float128_is_signaling_nan_avr
+#define float128_silence_nan float128_silence_nan_avr
+#define float16_add float16_add_avr
+#define float16_sub float16_sub_avr
+#define float32_add float32_add_avr
+#define float32_sub float32_sub_avr
+#define float64_add float64_add_avr
+#define float64_sub float64_sub_avr
+#define float16_mul float16_mul_avr
+#define float32_mul float32_mul_avr
+#define float64_mul float64_mul_avr
+#define float16_muladd float16_muladd_avr
+#define float32_muladd float32_muladd_avr
+#define float64_muladd float64_muladd_avr
+#define float16_div float16_div_avr
+#define float32_div float32_div_avr
+#define float64_div float64_div_avr
+#define float16_to_float32 float16_to_float32_avr
+#define float16_to_float64 float16_to_float64_avr
+#define float32_to_float16 float32_to_float16_avr
+#define float32_to_float64 float32_to_float64_avr
+#define float64_to_float16 float64_to_float16_avr
+#define float64_to_float32 float64_to_float32_avr
+#define float16_round_to_int float16_round_to_int_avr
+#define float32_round_to_int float32_round_to_int_avr
+#define float64_round_to_int float64_round_to_int_avr
+#define float16_to_int16_scalbn float16_to_int16_scalbn_avr
+#define float16_to_int32_scalbn float16_to_int32_scalbn_avr
+#define float16_to_int64_scalbn float16_to_int64_scalbn_avr
+#define float32_to_int16_scalbn float32_to_int16_scalbn_avr
+#define float32_to_int32_scalbn float32_to_int32_scalbn_avr
+#define float32_to_int64_scalbn float32_to_int64_scalbn_avr
+#define float64_to_int16_scalbn float64_to_int16_scalbn_avr
+#define float64_to_int32_scalbn float64_to_int32_scalbn_avr
+#define float64_to_int64_scalbn float64_to_int64_scalbn_avr
+#define float16_to_int16 float16_to_int16_avr
+#define float16_to_int32 float16_to_int32_avr
+#define float16_to_int64 float16_to_int64_avr
+#define float32_to_int16 float32_to_int16_avr
+#define float32_to_int32 float32_to_int32_avr
+#define float32_to_int64 float32_to_int64_avr
+#define float64_to_int16 float64_to_int16_avr
+#define float64_to_int32 float64_to_int32_avr
+#define float64_to_int64 float64_to_int64_avr
+#define float16_to_int16_round_to_zero float16_to_int16_round_to_zero_avr
+#define float16_to_int32_round_to_zero float16_to_int32_round_to_zero_avr
+#define float16_to_int64_round_to_zero float16_to_int64_round_to_zero_avr
+#define float32_to_int16_round_to_zero float32_to_int16_round_to_zero_avr
+#define float32_to_int32_round_to_zero float32_to_int32_round_to_zero_avr
+#define float32_to_int64_round_to_zero float32_to_int64_round_to_zero_avr
+#define float64_to_int16_round_to_zero float64_to_int16_round_to_zero_avr
+#define float64_to_int32_round_to_zero float64_to_int32_round_to_zero_avr
+#define float64_to_int64_round_to_zero float64_to_int64_round_to_zero_avr
+#define float16_to_uint16_scalbn float16_to_uint16_scalbn_avr
+#define float16_to_uint32_scalbn float16_to_uint32_scalbn_avr
+#define float16_to_uint64_scalbn float16_to_uint64_scalbn_avr
+#define float32_to_uint16_scalbn float32_to_uint16_scalbn_avr
+#define float32_to_uint32_scalbn float32_to_uint32_scalbn_avr
+#define float32_to_uint64_scalbn float32_to_uint64_scalbn_avr
+#define float64_to_uint16_scalbn float64_to_uint16_scalbn_avr
+#define float64_to_uint32_scalbn float64_to_uint32_scalbn_avr
+#define float64_to_uint64_scalbn float64_to_uint64_scalbn_avr
+#define float16_to_uint16 float16_to_uint16_avr
+#define float16_to_uint32 float16_to_uint32_avr
+#define float16_to_uint64 float16_to_uint64_avr
+#define float32_to_uint16 float32_to_uint16_avr
+#define float32_to_uint32 float32_to_uint32_avr
+#define float32_to_uint64 float32_to_uint64_avr
+#define float64_to_uint16 float64_to_uint16_avr
+#define float64_to_uint32 float64_to_uint32_avr
+#define float64_to_uint64 float64_to_uint64_avr
+#define float16_to_uint16_round_to_zero float16_to_uint16_round_to_zero_avr
+#define float16_to_uint32_round_to_zero float16_to_uint32_round_to_zero_avr
+#define float16_to_uint64_round_to_zero float16_to_uint64_round_to_zero_avr
+#define float32_to_uint16_round_to_zero float32_to_uint16_round_to_zero_avr
+#define float32_to_uint32_round_to_zero float32_to_uint32_round_to_zero_avr
+#define float32_to_uint64_round_to_zero float32_to_uint64_round_to_zero_avr
+#define float64_to_uint16_round_to_zero float64_to_uint16_round_to_zero_avr
+#define float64_to_uint32_round_to_zero float64_to_uint32_round_to_zero_avr
+#define float64_to_uint64_round_to_zero float64_to_uint64_round_to_zero_avr
+#define int64_to_float16_scalbn int64_to_float16_scalbn_avr
+#define int32_to_float16_scalbn int32_to_float16_scalbn_avr
+#define int16_to_float16_scalbn int16_to_float16_scalbn_avr
+#define int64_to_float16 int64_to_float16_avr
+#define int32_to_float16 int32_to_float16_avr
+#define int16_to_float16 int16_to_float16_avr
+#define int64_to_float32_scalbn int64_to_float32_scalbn_avr
+#define int32_to_float32_scalbn int32_to_float32_scalbn_avr
+#define int16_to_float32_scalbn int16_to_float32_scalbn_avr
+#define int64_to_float32 int64_to_float32_avr
+#define int32_to_float32 int32_to_float32_avr
+#define int16_to_float32 int16_to_float32_avr
+#define int64_to_float64_scalbn int64_to_float64_scalbn_avr
+#define int32_to_float64_scalbn int32_to_float64_scalbn_avr
+#define int16_to_float64_scalbn int16_to_float64_scalbn_avr
+#define int64_to_float64 int64_to_float64_avr
+#define int32_to_float64 int32_to_float64_avr
+#define int16_to_float64 int16_to_float64_avr
+#define uint64_to_float16_scalbn uint64_to_float16_scalbn_avr
+#define uint32_to_float16_scalbn uint32_to_float16_scalbn_avr
+#define uint16_to_float16_scalbn uint16_to_float16_scalbn_avr
+#define uint64_to_float16 uint64_to_float16_avr
+#define uint32_to_float16 uint32_to_float16_avr
+#define uint16_to_float16 uint16_to_float16_avr
+#define uint64_to_float32_scalbn uint64_to_float32_scalbn_avr
+#define uint32_to_float32_scalbn uint32_to_float32_scalbn_avr
+#define uint16_to_float32_scalbn uint16_to_float32_scalbn_avr
+#define uint64_to_float32 uint64_to_float32_avr
+#define uint32_to_float32 uint32_to_float32_avr
+#define uint16_to_float32 uint16_to_float32_avr
+#define uint64_to_float64_scalbn uint64_to_float64_scalbn_avr
+#define uint32_to_float64_scalbn uint32_to_float64_scalbn_avr
+#define uint16_to_float64_scalbn uint16_to_float64_scalbn_avr
+#define uint64_to_float64 uint64_to_float64_avr
+#define uint32_to_float64 uint32_to_float64_avr
+#define uint16_to_float64 uint16_to_float64_avr
+#define float16_min float16_min_avr
+#define float16_minnum float16_minnum_avr
+#define float16_minnummag float16_minnummag_avr
+#define float16_max float16_max_avr
+#define float16_maxnum float16_maxnum_avr
+#define float16_maxnummag float16_maxnummag_avr
+#define float32_min float32_min_avr
+#define float32_minnum float32_minnum_avr
+#define float32_minnummag float32_minnummag_avr
+#define float32_max float32_max_avr
+#define float32_maxnum float32_maxnum_avr
+#define float32_maxnummag float32_maxnummag_avr
+#define float64_min float64_min_avr
+#define float64_minnum float64_minnum_avr
+#define float64_minnummag float64_minnummag_avr
+#define float64_max float64_max_avr
+#define float64_maxnum float64_maxnum_avr
+#define float64_maxnummag float64_maxnummag_avr
+#define float16_compare float16_compare_avr
+#define float16_compare_quiet float16_compare_quiet_avr
+#define float32_compare float32_compare_avr
+#define float32_compare_quiet float32_compare_quiet_avr
+#define float64_compare float64_compare_avr
+#define float64_compare_quiet float64_compare_quiet_avr
+#define float16_scalbn float16_scalbn_avr
+#define float32_scalbn float32_scalbn_avr
+#define float64_scalbn float64_scalbn_avr
+#define float16_sqrt float16_sqrt_avr
+#define float32_sqrt float32_sqrt_avr
+#define float64_sqrt float64_sqrt_avr
+#define float16_default_nan float16_default_nan_avr
+#define float32_default_nan float32_default_nan_avr
+#define float64_default_nan float64_default_nan_avr
+#define float128_default_nan float128_default_nan_avr
+#define float16_silence_nan float16_silence_nan_avr
+#define float32_silence_nan float32_silence_nan_avr
+#define float64_silence_nan float64_silence_nan_avr
+#define float16_squash_input_denormal float16_squash_input_denormal_avr
+#define float32_squash_input_denormal float32_squash_input_denormal_avr
+#define float64_squash_input_denormal float64_squash_input_denormal_avr
+#define normalizeFloatx80Subnormal normalizeFloatx80Subnormal_avr
+#define roundAndPackFloatx80 roundAndPackFloatx80_avr
+#define normalizeRoundAndPackFloatx80 normalizeRoundAndPackFloatx80_avr
+#define int32_to_floatx80 int32_to_floatx80_avr
+#define int32_to_float128 int32_to_float128_avr
+#define int64_to_floatx80 int64_to_floatx80_avr
+#define int64_to_float128 int64_to_float128_avr
+#define uint64_to_float128 uint64_to_float128_avr
+#define float32_to_floatx80 float32_to_floatx80_avr
+#define float32_to_float128 float32_to_float128_avr
+#define float32_rem float32_rem_avr
+#define float32_exp2 float32_exp2_avr
+#define float32_log2 float32_log2_avr
+#define float32_eq float32_eq_avr
+#define float32_le float32_le_avr
+#define float32_lt float32_lt_avr
+#define float32_unordered float32_unordered_avr
+#define float32_eq_quiet float32_eq_quiet_avr
+#define float32_le_quiet float32_le_quiet_avr
+#define float32_lt_quiet float32_lt_quiet_avr
+#define float32_unordered_quiet float32_unordered_quiet_avr
+#define float64_to_floatx80 float64_to_floatx80_avr
+#define float64_to_float128 float64_to_float128_avr
+#define float64_rem float64_rem_avr
+#define float64_log2 float64_log2_avr
+#define float64_eq float64_eq_avr
+#define float64_le float64_le_avr
+#define float64_lt float64_lt_avr
+#define float64_unordered float64_unordered_avr
+#define float64_eq_quiet float64_eq_quiet_avr
+#define float64_le_quiet float64_le_quiet_avr
+#define float64_lt_quiet float64_lt_quiet_avr
+#define float64_unordered_quiet float64_unordered_quiet_avr
+#define floatx80_to_int32 floatx80_to_int32_avr
+#define floatx80_to_int32_round_to_zero floatx80_to_int32_round_to_zero_avr
+#define floatx80_to_int64 floatx80_to_int64_avr
+#define floatx80_to_int64_round_to_zero floatx80_to_int64_round_to_zero_avr
+#define floatx80_to_float32 floatx80_to_float32_avr
+#define floatx80_to_float64 floatx80_to_float64_avr
+#define floatx80_to_float128 floatx80_to_float128_avr
+#define floatx80_round floatx80_round_avr
+#define floatx80_round_to_int floatx80_round_to_int_avr
+#define floatx80_add floatx80_add_avr
+#define floatx80_sub floatx80_sub_avr
+#define floatx80_mul floatx80_mul_avr
+#define floatx80_div floatx80_div_avr
+#define floatx80_rem floatx80_rem_avr
+#define floatx80_sqrt floatx80_sqrt_avr
+#define floatx80_eq floatx80_eq_avr
+#define floatx80_le floatx80_le_avr
+#define floatx80_lt floatx80_lt_avr
+#define floatx80_unordered floatx80_unordered_avr
+#define floatx80_eq_quiet floatx80_eq_quiet_avr
+#define floatx80_le_quiet floatx80_le_quiet_avr
+#define floatx80_lt_quiet floatx80_lt_quiet_avr
+#define floatx80_unordered_quiet floatx80_unordered_quiet_avr
+#define float128_to_int32 float128_to_int32_avr
+#define float128_to_int32_round_to_zero float128_to_int32_round_to_zero_avr
+#define float128_to_int64 float128_to_int64_avr
+#define float128_to_int64_round_to_zero float128_to_int64_round_to_zero_avr
+#define float128_to_uint64 float128_to_uint64_avr
+#define float128_to_uint64_round_to_zero float128_to_uint64_round_to_zero_avr
+#define float128_to_uint32_round_to_zero float128_to_uint32_round_to_zero_avr
+#define float128_to_uint32 float128_to_uint32_avr
+#define float128_to_float32 float128_to_float32_avr
+#define float128_to_float64 float128_to_float64_avr
+#define float128_to_floatx80 float128_to_floatx80_avr
+#define float128_round_to_int float128_round_to_int_avr
+#define float128_add float128_add_avr
+#define float128_sub float128_sub_avr
+#define float128_mul float128_mul_avr
+#define float128_div float128_div_avr
+#define float128_rem float128_rem_avr
+#define float128_sqrt float128_sqrt_avr
+#define float128_eq float128_eq_avr
+#define float128_le float128_le_avr
+#define float128_lt float128_lt_avr
+#define float128_unordered float128_unordered_avr
+#define float128_eq_quiet float128_eq_quiet_avr
+#define float128_le_quiet float128_le_quiet_avr
+#define float128_lt_quiet float128_lt_quiet_avr
+#define float128_unordered_quiet float128_unordered_quiet_avr
+#define floatx80_compare floatx80_compare_avr
+#define floatx80_compare_quiet floatx80_compare_quiet_avr
+#define float128_compare float128_compare_avr
+#define float128_compare_quiet float128_compare_quiet_avr
+#define floatx80_scalbn floatx80_scalbn_avr
+#define float128_scalbn float128_scalbn_avr
+#define softfloat_init softfloat_init_avr
+#define tcg_optimize tcg_optimize_avr
+#define gen_new_label gen_new_label_avr
+#define tcg_can_emit_vec_op tcg_can_emit_vec_op_avr
+#define tcg_expand_vec_op tcg_expand_vec_op_avr
+#define tcg_register_jit tcg_register_jit_avr
+#define tcg_tb_insert tcg_tb_insert_avr
+#define tcg_tb_remove tcg_tb_remove_avr
+#define tcg_tb_lookup tcg_tb_lookup_avr
+#define tcg_tb_foreach tcg_tb_foreach_avr
+#define tcg_nb_tbs tcg_nb_tbs_avr
+#define tcg_region_reset_all tcg_region_reset_all_avr
+#define tcg_region_init tcg_region_init_avr
+#define tcg_code_size tcg_code_size_avr
+#define tcg_code_capacity tcg_code_capacity_avr
+#define tcg_tb_phys_invalidate_count tcg_tb_phys_invalidate_count_avr
+#define tcg_malloc_internal tcg_malloc_internal_avr
+#define tcg_pool_reset tcg_pool_reset_avr
+#define tcg_context_init tcg_context_init_avr
+#define tcg_tb_alloc tcg_tb_alloc_avr
+#define tcg_prologue_init tcg_prologue_init_avr
+#define tcg_func_start tcg_func_start_avr
+#define tcg_set_frame tcg_set_frame_avr
+#define tcg_global_mem_new_internal tcg_global_mem_new_internal_avr
+#define tcg_temp_new_internal tcg_temp_new_internal_avr
+#define tcg_temp_new_vec tcg_temp_new_vec_avr
+#define tcg_temp_new_vec_matching tcg_temp_new_vec_matching_avr
+#define tcg_temp_free_internal tcg_temp_free_internal_avr
+#define tcg_const_i32 tcg_const_i32_avr
+#define tcg_const_i64 tcg_const_i64_avr
+#define tcg_const_local_i32 tcg_const_local_i32_avr
+#define tcg_const_local_i64 tcg_const_local_i64_avr
+#define tcg_op_supported tcg_op_supported_avr
+#define tcg_gen_callN tcg_gen_callN_avr
+#define tcg_op_remove tcg_op_remove_avr
+#define tcg_emit_op tcg_emit_op_avr
+#define tcg_op_insert_before tcg_op_insert_before_avr
+#define tcg_op_insert_after tcg_op_insert_after_avr
+#define tcg_cpu_exec_time tcg_cpu_exec_time_avr
+#define tcg_gen_code tcg_gen_code_avr
+#define tcg_gen_op1 tcg_gen_op1_avr
+#define tcg_gen_op2 tcg_gen_op2_avr
+#define tcg_gen_op3 tcg_gen_op3_avr
+#define tcg_gen_op4 tcg_gen_op4_avr
+#define tcg_gen_op5 tcg_gen_op5_avr
+#define tcg_gen_op6 tcg_gen_op6_avr
+#define tcg_gen_mb tcg_gen_mb_avr
+#define tcg_gen_addi_i32 tcg_gen_addi_i32_avr
+#define tcg_gen_subfi_i32 tcg_gen_subfi_i32_avr
+#define tcg_gen_subi_i32 tcg_gen_subi_i32_avr
+#define tcg_gen_andi_i32 tcg_gen_andi_i32_avr
+#define tcg_gen_ori_i32 tcg_gen_ori_i32_avr
+#define tcg_gen_xori_i32 tcg_gen_xori_i32_avr
+#define tcg_gen_shli_i32 tcg_gen_shli_i32_avr
+#define tcg_gen_shri_i32 tcg_gen_shri_i32_avr
+#define tcg_gen_sari_i32 tcg_gen_sari_i32_avr
+#define tcg_gen_brcond_i32 tcg_gen_brcond_i32_avr
+#define tcg_gen_brcondi_i32 tcg_gen_brcondi_i32_avr
+#define tcg_gen_setcond_i32 tcg_gen_setcond_i32_avr
+#define tcg_gen_setcondi_i32 tcg_gen_setcondi_i32_avr
+#define tcg_gen_muli_i32 tcg_gen_muli_i32_avr
+#define tcg_gen_div_i32 tcg_gen_div_i32_avr
+#define tcg_gen_rem_i32 tcg_gen_rem_i32_avr
+#define tcg_gen_divu_i32 tcg_gen_divu_i32_avr
+#define tcg_gen_remu_i32 tcg_gen_remu_i32_avr
+#define tcg_gen_andc_i32 tcg_gen_andc_i32_avr
+#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_avr
+#define tcg_gen_nand_i32 tcg_gen_nand_i32_avr
+#define tcg_gen_nor_i32 tcg_gen_nor_i32_avr
+#define tcg_gen_orc_i32 tcg_gen_orc_i32_avr
+#define tcg_gen_clz_i32 tcg_gen_clz_i32_avr
+#define tcg_gen_clzi_i32 tcg_gen_clzi_i32_avr
+#define tcg_gen_ctz_i32 tcg_gen_ctz_i32_avr
+#define tcg_gen_ctzi_i32 tcg_gen_ctzi_i32_avr
+#define tcg_gen_clrsb_i32 tcg_gen_clrsb_i32_avr
+#define tcg_gen_ctpop_i32 tcg_gen_ctpop_i32_avr
+#define tcg_gen_rotl_i32 tcg_gen_rotl_i32_avr
+#define tcg_gen_rotli_i32 tcg_gen_rotli_i32_avr
+#define tcg_gen_rotr_i32 tcg_gen_rotr_i32_avr
+#define tcg_gen_rotri_i32 tcg_gen_rotri_i32_avr
+#define tcg_gen_deposit_i32 tcg_gen_deposit_i32_avr
+#define tcg_gen_deposit_z_i32 tcg_gen_deposit_z_i32_avr
+#define tcg_gen_extract_i32 tcg_gen_extract_i32_avr
+#define tcg_gen_sextract_i32 tcg_gen_sextract_i32_avr
+#define tcg_gen_extract2_i32 tcg_gen_extract2_i32_avr
+#define tcg_gen_movcond_i32 tcg_gen_movcond_i32_avr
+#define tcg_gen_add2_i32 tcg_gen_add2_i32_avr
+#define tcg_gen_sub2_i32 tcg_gen_sub2_i32_avr
+#define tcg_gen_mulu2_i32 tcg_gen_mulu2_i32_avr
+#define tcg_gen_muls2_i32 tcg_gen_muls2_i32_avr
+#define tcg_gen_mulsu2_i32 tcg_gen_mulsu2_i32_avr
+#define tcg_gen_ext8s_i32 tcg_gen_ext8s_i32_avr
+#define tcg_gen_ext16s_i32 tcg_gen_ext16s_i32_avr
+#define tcg_gen_ext8u_i32 tcg_gen_ext8u_i32_avr
+#define tcg_gen_ext16u_i32 tcg_gen_ext16u_i32_avr
+#define tcg_gen_bswap16_i32 tcg_gen_bswap16_i32_avr
+#define tcg_gen_bswap32_i32 tcg_gen_bswap32_i32_avr
+#define tcg_gen_smin_i32 tcg_gen_smin_i32_avr
+#define tcg_gen_umin_i32 tcg_gen_umin_i32_avr
+#define tcg_gen_smax_i32 tcg_gen_smax_i32_avr
+#define tcg_gen_umax_i32 tcg_gen_umax_i32_avr
+#define tcg_gen_abs_i32 tcg_gen_abs_i32_avr
+#define tcg_gen_addi_i64 tcg_gen_addi_i64_avr
+#define tcg_gen_subfi_i64 tcg_gen_subfi_i64_avr
+#define tcg_gen_subi_i64 tcg_gen_subi_i64_avr
+#define tcg_gen_andi_i64 tcg_gen_andi_i64_avr
+#define tcg_gen_ori_i64 tcg_gen_ori_i64_avr
+#define tcg_gen_xori_i64 tcg_gen_xori_i64_avr
+#define tcg_gen_shli_i64 tcg_gen_shli_i64_avr
+#define tcg_gen_shri_i64 tcg_gen_shri_i64_avr
+#define tcg_gen_sari_i64 tcg_gen_sari_i64_avr
+#define tcg_gen_brcond_i64 tcg_gen_brcond_i64_avr
+#define tcg_gen_brcondi_i64 tcg_gen_brcondi_i64_avr
+#define tcg_gen_setcond_i64 tcg_gen_setcond_i64_avr
+#define tcg_gen_setcondi_i64 tcg_gen_setcondi_i64_avr
+#define tcg_gen_muli_i64 tcg_gen_muli_i64_avr
+#define tcg_gen_div_i64 tcg_gen_div_i64_avr
+#define tcg_gen_rem_i64 tcg_gen_rem_i64_avr
+#define tcg_gen_divu_i64 tcg_gen_divu_i64_avr
+#define tcg_gen_remu_i64 tcg_gen_remu_i64_avr
+#define tcg_gen_ext8s_i64 tcg_gen_ext8s_i64_avr
+#define tcg_gen_ext16s_i64 tcg_gen_ext16s_i64_avr
+#define tcg_gen_ext32s_i64 tcg_gen_ext32s_i64_avr
+#define tcg_gen_ext8u_i64 tcg_gen_ext8u_i64_avr
+#define tcg_gen_ext16u_i64 tcg_gen_ext16u_i64_avr
+#define tcg_gen_ext32u_i64 tcg_gen_ext32u_i64_avr
+#define tcg_gen_bswap16_i64 tcg_gen_bswap16_i64_avr
+#define tcg_gen_bswap32_i64 tcg_gen_bswap32_i64_avr
+#define tcg_gen_bswap64_i64 tcg_gen_bswap64_i64_avr
+#define tcg_gen_not_i64 tcg_gen_not_i64_avr
+#define tcg_gen_andc_i64 tcg_gen_andc_i64_avr
+#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_avr
+#define tcg_gen_nand_i64 tcg_gen_nand_i64_avr
+#define tcg_gen_nor_i64 tcg_gen_nor_i64_avr
+#define tcg_gen_orc_i64 tcg_gen_orc_i64_avr
+#define tcg_gen_clz_i64 tcg_gen_clz_i64_avr
+#define tcg_gen_clzi_i64 tcg_gen_clzi_i64_avr
+#define tcg_gen_ctz_i64 tcg_gen_ctz_i64_avr
+#define tcg_gen_ctzi_i64 tcg_gen_ctzi_i64_avr
+#define tcg_gen_clrsb_i64 tcg_gen_clrsb_i64_avr
+#define tcg_gen_ctpop_i64 tcg_gen_ctpop_i64_avr
+#define tcg_gen_rotl_i64 tcg_gen_rotl_i64_avr
+#define tcg_gen_rotli_i64 tcg_gen_rotli_i64_avr
+#define tcg_gen_rotr_i64 tcg_gen_rotr_i64_avr
+#define tcg_gen_rotri_i64 tcg_gen_rotri_i64_avr
+#define tcg_gen_deposit_i64 tcg_gen_deposit_i64_avr
+#define tcg_gen_deposit_z_i64 tcg_gen_deposit_z_i64_avr
+#define tcg_gen_extract_i64 tcg_gen_extract_i64_avr
+#define tcg_gen_sextract_i64 tcg_gen_sextract_i64_avr
+#define tcg_gen_extract2_i64 tcg_gen_extract2_i64_avr
+#define tcg_gen_movcond_i64 tcg_gen_movcond_i64_avr
+#define tcg_gen_add2_i64 tcg_gen_add2_i64_avr
+#define tcg_gen_sub2_i64 tcg_gen_sub2_i64_avr
+#define tcg_gen_mulu2_i64 tcg_gen_mulu2_i64_avr
+#define tcg_gen_muls2_i64 tcg_gen_muls2_i64_avr
+#define tcg_gen_mulsu2_i64 tcg_gen_mulsu2_i64_avr
+#define tcg_gen_smin_i64 tcg_gen_smin_i64_avr
+#define tcg_gen_umin_i64 tcg_gen_umin_i64_avr
+#define tcg_gen_smax_i64 tcg_gen_smax_i64_avr
+#define tcg_gen_umax_i64 tcg_gen_umax_i64_avr
+#define tcg_gen_abs_i64 tcg_gen_abs_i64_avr
+#define tcg_gen_extrl_i64_i32 tcg_gen_extrl_i64_i32_avr
+#define tcg_gen_extrh_i64_i32 tcg_gen_extrh_i64_i32_avr
+#define tcg_gen_extu_i32_i64 tcg_gen_extu_i32_i64_avr
+#define tcg_gen_ext_i32_i64 tcg_gen_ext_i32_i64_avr
+#define tcg_gen_concat_i32_i64 tcg_gen_concat_i32_i64_avr
+#define tcg_gen_extr_i64_i32 tcg_gen_extr_i64_i32_avr
+#define tcg_gen_extr32_i64 tcg_gen_extr32_i64_avr
+#define tcg_gen_exit_tb tcg_gen_exit_tb_avr
+#define tcg_gen_goto_tb tcg_gen_goto_tb_avr
+#define tcg_gen_lookup_and_goto_ptr tcg_gen_lookup_and_goto_ptr_avr
+#define check_exit_request check_exit_request_avr
+#define tcg_gen_qemu_ld_i32 tcg_gen_qemu_ld_i32_avr
+#define tcg_gen_qemu_st_i32 tcg_gen_qemu_st_i32_avr
+#define tcg_gen_qemu_ld_i64 tcg_gen_qemu_ld_i64_avr
+#define tcg_gen_qemu_st_i64 tcg_gen_qemu_st_i64_avr
+#define tcg_gen_atomic_cmpxchg_i32 tcg_gen_atomic_cmpxchg_i32_avr
+#define tcg_gen_atomic_cmpxchg_i64 tcg_gen_atomic_cmpxchg_i64_avr
+#define tcg_gen_atomic_fetch_add_i32 tcg_gen_atomic_fetch_add_i32_avr
+#define tcg_gen_atomic_fetch_add_i64 tcg_gen_atomic_fetch_add_i64_avr
+#define tcg_gen_atomic_fetch_and_i32 tcg_gen_atomic_fetch_and_i32_avr
+#define tcg_gen_atomic_fetch_and_i64 tcg_gen_atomic_fetch_and_i64_avr
+#define tcg_gen_atomic_fetch_or_i32 tcg_gen_atomic_fetch_or_i32_avr
+#define tcg_gen_atomic_fetch_or_i64 tcg_gen_atomic_fetch_or_i64_avr
+#define tcg_gen_atomic_fetch_xor_i32 tcg_gen_atomic_fetch_xor_i32_avr
+#define tcg_gen_atomic_fetch_xor_i64 tcg_gen_atomic_fetch_xor_i64_avr
+#define tcg_gen_atomic_fetch_smin_i32 tcg_gen_atomic_fetch_smin_i32_avr
+#define tcg_gen_atomic_fetch_smin_i64 tcg_gen_atomic_fetch_smin_i64_avr
+#define tcg_gen_atomic_fetch_umin_i32 tcg_gen_atomic_fetch_umin_i32_avr
+#define tcg_gen_atomic_fetch_umin_i64 tcg_gen_atomic_fetch_umin_i64_avr
+#define tcg_gen_atomic_fetch_smax_i32 tcg_gen_atomic_fetch_smax_i32_avr
+#define tcg_gen_atomic_fetch_smax_i64 tcg_gen_atomic_fetch_smax_i64_avr
+#define tcg_gen_atomic_fetch_umax_i32 tcg_gen_atomic_fetch_umax_i32_avr
+#define tcg_gen_atomic_fetch_umax_i64 tcg_gen_atomic_fetch_umax_i64_avr
+#define tcg_gen_atomic_add_fetch_i32 tcg_gen_atomic_add_fetch_i32_avr
+#define tcg_gen_atomic_add_fetch_i64 tcg_gen_atomic_add_fetch_i64_avr
+#define tcg_gen_atomic_and_fetch_i32 tcg_gen_atomic_and_fetch_i32_avr
+#define tcg_gen_atomic_and_fetch_i64 tcg_gen_atomic_and_fetch_i64_avr
+#define tcg_gen_atomic_or_fetch_i32 tcg_gen_atomic_or_fetch_i32_avr
+#define tcg_gen_atomic_or_fetch_i64 tcg_gen_atomic_or_fetch_i64_avr
+#define tcg_gen_atomic_xor_fetch_i32 tcg_gen_atomic_xor_fetch_i32_avr
+#define tcg_gen_atomic_xor_fetch_i64 tcg_gen_atomic_xor_fetch_i64_avr
+#define tcg_gen_atomic_smin_fetch_i32 tcg_gen_atomic_smin_fetch_i32_avr
+#define tcg_gen_atomic_smin_fetch_i64 tcg_gen_atomic_smin_fetch_i64_avr
+#define tcg_gen_atomic_umin_fetch_i32 tcg_gen_atomic_umin_fetch_i32_avr
+#define tcg_gen_atomic_umin_fetch_i64 tcg_gen_atomic_umin_fetch_i64_avr
+#define tcg_gen_atomic_smax_fetch_i32 tcg_gen_atomic_smax_fetch_i32_avr
+#define tcg_gen_atomic_smax_fetch_i64 tcg_gen_atomic_smax_fetch_i64_avr
+#define tcg_gen_atomic_umax_fetch_i32 tcg_gen_atomic_umax_fetch_i32_avr
+#define tcg_gen_atomic_umax_fetch_i64 tcg_gen_atomic_umax_fetch_i64_avr
+#define tcg_gen_atomic_xchg_i32 tcg_gen_atomic_xchg_i32_avr
+#define tcg_gen_atomic_xchg_i64 tcg_gen_atomic_xchg_i64_avr
+#define simd_desc simd_desc_avr
+#define tcg_gen_gvec_2_ool tcg_gen_gvec_2_ool_avr
+#define tcg_gen_gvec_2i_ool tcg_gen_gvec_2i_ool_avr
+#define tcg_gen_gvec_3_ool tcg_gen_gvec_3_ool_avr
+#define tcg_gen_gvec_4_ool tcg_gen_gvec_4_ool_avr
+#define tcg_gen_gvec_5_ool tcg_gen_gvec_5_ool_avr
+#define tcg_gen_gvec_2_ptr tcg_gen_gvec_2_ptr_avr
+#define tcg_gen_gvec_3_ptr tcg_gen_gvec_3_ptr_avr
+#define tcg_gen_gvec_4_ptr tcg_gen_gvec_4_ptr_avr
+#define tcg_gen_gvec_5_ptr tcg_gen_gvec_5_ptr_avr
+#define tcg_gen_gvec_2 tcg_gen_gvec_2_avr
+#define tcg_gen_gvec_2i tcg_gen_gvec_2i_avr
+#define tcg_gen_gvec_2s tcg_gen_gvec_2s_avr
+#define tcg_gen_gvec_3 tcg_gen_gvec_3_avr
+#define tcg_gen_gvec_3i tcg_gen_gvec_3i_avr
+#define tcg_gen_gvec_4 tcg_gen_gvec_4_avr
+#define tcg_gen_gvec_mov tcg_gen_gvec_mov_avr
+#define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_avr
+#define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_avr
+#define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_avr
+#define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_avr
+#define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_avr
+#define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_avr
+#define tcg_gen_gvec_dup8i tcg_gen_gvec_dup8i_avr
+#define tcg_gen_gvec_not tcg_gen_gvec_not_avr
+#define tcg_gen_vec_add8_i64 tcg_gen_vec_add8_i64_avr
+#define tcg_gen_vec_add16_i64 tcg_gen_vec_add16_i64_avr
+#define tcg_gen_vec_add32_i64 tcg_gen_vec_add32_i64_avr
+#define tcg_gen_gvec_add tcg_gen_gvec_add_avr
+#define tcg_gen_gvec_adds tcg_gen_gvec_adds_avr
+#define tcg_gen_gvec_addi tcg_gen_gvec_addi_avr
+#define tcg_gen_gvec_subs tcg_gen_gvec_subs_avr
+#define tcg_gen_vec_sub8_i64 tcg_gen_vec_sub8_i64_avr
+#define tcg_gen_vec_sub16_i64 tcg_gen_vec_sub16_i64_avr
+#define tcg_gen_vec_sub32_i64 tcg_gen_vec_sub32_i64_avr
+#define tcg_gen_gvec_sub tcg_gen_gvec_sub_avr
+#define tcg_gen_gvec_mul tcg_gen_gvec_mul_avr
+#define tcg_gen_gvec_muls tcg_gen_gvec_muls_avr
+#define tcg_gen_gvec_muli tcg_gen_gvec_muli_avr
+#define tcg_gen_gvec_ssadd tcg_gen_gvec_ssadd_avr
+#define tcg_gen_gvec_sssub tcg_gen_gvec_sssub_avr
+#define tcg_gen_gvec_usadd tcg_gen_gvec_usadd_avr
+#define tcg_gen_gvec_ussub tcg_gen_gvec_ussub_avr
+#define tcg_gen_gvec_smin tcg_gen_gvec_smin_avr
+#define tcg_gen_gvec_umin tcg_gen_gvec_umin_avr
+#define tcg_gen_gvec_smax tcg_gen_gvec_smax_avr
+#define tcg_gen_gvec_umax tcg_gen_gvec_umax_avr
+#define tcg_gen_vec_neg8_i64 tcg_gen_vec_neg8_i64_avr
+#define tcg_gen_vec_neg16_i64 tcg_gen_vec_neg16_i64_avr
+#define tcg_gen_vec_neg32_i64 tcg_gen_vec_neg32_i64_avr
+#define tcg_gen_gvec_neg tcg_gen_gvec_neg_avr
+#define tcg_gen_gvec_abs tcg_gen_gvec_abs_avr
+#define tcg_gen_gvec_and tcg_gen_gvec_and_avr
+#define tcg_gen_gvec_or tcg_gen_gvec_or_avr
+#define tcg_gen_gvec_xor tcg_gen_gvec_xor_avr
+#define tcg_gen_gvec_andc tcg_gen_gvec_andc_avr
+#define tcg_gen_gvec_orc tcg_gen_gvec_orc_avr
+#define tcg_gen_gvec_nand tcg_gen_gvec_nand_avr
+#define tcg_gen_gvec_nor tcg_gen_gvec_nor_avr
+#define tcg_gen_gvec_eqv tcg_gen_gvec_eqv_avr
+#define tcg_gen_gvec_ands tcg_gen_gvec_ands_avr
+#define tcg_gen_gvec_andi tcg_gen_gvec_andi_avr
+#define tcg_gen_gvec_xors tcg_gen_gvec_xors_avr
+#define tcg_gen_gvec_xori tcg_gen_gvec_xori_avr
+#define tcg_gen_gvec_ors tcg_gen_gvec_ors_avr
+#define tcg_gen_gvec_ori tcg_gen_gvec_ori_avr
+#define tcg_gen_vec_shl8i_i64 tcg_gen_vec_shl8i_i64_avr
+#define tcg_gen_vec_shl16i_i64 tcg_gen_vec_shl16i_i64_avr
+#define tcg_gen_gvec_shli tcg_gen_gvec_shli_avr
+#define tcg_gen_vec_shr8i_i64 tcg_gen_vec_shr8i_i64_avr
+#define tcg_gen_vec_shr16i_i64 tcg_gen_vec_shr16i_i64_avr
+#define tcg_gen_gvec_shri tcg_gen_gvec_shri_avr
+#define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_avr
+#define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_avr
+#define tcg_gen_gvec_sari tcg_gen_gvec_sari_avr
+#define tcg_gen_gvec_shls tcg_gen_gvec_shls_avr
+#define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_avr
+#define tcg_gen_gvec_sars tcg_gen_gvec_sars_avr
+#define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_avr
+#define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_avr
+#define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_avr
+#define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_avr
+#define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_avr
+#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_avr
+#define vec_gen_2 vec_gen_2_avr
+#define vec_gen_3 vec_gen_3_avr
+#define vec_gen_4 vec_gen_4_avr
+#define tcg_gen_mov_vec tcg_gen_mov_vec_avr
+#define tcg_const_zeros_vec tcg_const_zeros_vec_avr
+#define tcg_const_ones_vec tcg_const_ones_vec_avr
+#define tcg_const_zeros_vec_matching tcg_const_zeros_vec_matching_avr
+#define tcg_const_ones_vec_matching tcg_const_ones_vec_matching_avr
+#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_avr
+#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_avr
+#define tcg_gen_dup16i_vec tcg_gen_dup16i_vec_avr
+#define tcg_gen_dup8i_vec tcg_gen_dup8i_vec_avr
+#define tcg_gen_dupi_vec tcg_gen_dupi_vec_avr
+#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_avr
+#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_avr
+#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_avr
+#define tcg_gen_ld_vec tcg_gen_ld_vec_avr
+#define tcg_gen_st_vec tcg_gen_st_vec_avr
+#define tcg_gen_stl_vec tcg_gen_stl_vec_avr
+#define tcg_gen_and_vec tcg_gen_and_vec_avr
+#define tcg_gen_or_vec tcg_gen_or_vec_avr
+#define tcg_gen_xor_vec tcg_gen_xor_vec_avr
+#define tcg_gen_andc_vec tcg_gen_andc_vec_avr
+#define tcg_gen_orc_vec tcg_gen_orc_vec_avr
+#define tcg_gen_nand_vec tcg_gen_nand_vec_avr
+#define tcg_gen_nor_vec tcg_gen_nor_vec_avr
+#define tcg_gen_eqv_vec tcg_gen_eqv_vec_avr
+#define tcg_gen_not_vec tcg_gen_not_vec_avr
+#define tcg_gen_neg_vec tcg_gen_neg_vec_avr
+#define tcg_gen_abs_vec tcg_gen_abs_vec_avr
+#define tcg_gen_shli_vec tcg_gen_shli_vec_avr
+#define tcg_gen_shri_vec tcg_gen_shri_vec_avr
+#define tcg_gen_sari_vec tcg_gen_sari_vec_avr
+#define tcg_gen_cmp_vec tcg_gen_cmp_vec_avr
+#define tcg_gen_add_vec tcg_gen_add_vec_avr
+#define tcg_gen_sub_vec tcg_gen_sub_vec_avr
+#define tcg_gen_mul_vec tcg_gen_mul_vec_avr
+#define tcg_gen_ssadd_vec tcg_gen_ssadd_vec_avr
+#define tcg_gen_usadd_vec tcg_gen_usadd_vec_avr
+#define tcg_gen_sssub_vec tcg_gen_sssub_vec_avr
+#define tcg_gen_ussub_vec tcg_gen_ussub_vec_avr
+#define tcg_gen_smin_vec tcg_gen_smin_vec_avr
+#define tcg_gen_umin_vec tcg_gen_umin_vec_avr
+#define tcg_gen_smax_vec tcg_gen_smax_vec_avr
+#define tcg_gen_umax_vec tcg_gen_umax_vec_avr
+#define tcg_gen_shlv_vec tcg_gen_shlv_vec_avr
+#define tcg_gen_shrv_vec tcg_gen_shrv_vec_avr
+#define tcg_gen_sarv_vec tcg_gen_sarv_vec_avr
+#define tcg_gen_shls_vec tcg_gen_shls_vec_avr
+#define tcg_gen_shrs_vec tcg_gen_shrs_vec_avr
+#define tcg_gen_sars_vec tcg_gen_sars_vec_avr
+#define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_avr
+#define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_avr
+#define tb_htable_lookup tb_htable_lookup_avr
+#define tb_set_jmp_target tb_set_jmp_target_avr
+#define cpu_exec cpu_exec_avr
+#define cpu_loop_exit_noexc cpu_loop_exit_noexc_avr
+#define cpu_reloading_memory_map cpu_reloading_memory_map_avr
+#define cpu_loop_exit cpu_loop_exit_avr
+#define cpu_loop_exit_restore cpu_loop_exit_restore_avr
+#define cpu_loop_exit_atomic cpu_loop_exit_atomic_avr
+#define tlb_init tlb_init_avr
+#define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_avr
+#define tlb_flush tlb_flush_avr
+#define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_avr
+#define tlb_flush_all_cpus tlb_flush_all_cpus_avr
+#define tlb_flush_by_mmuidx_all_cpus_synced tlb_flush_by_mmuidx_all_cpus_synced_avr
+#define tlb_flush_all_cpus_synced tlb_flush_all_cpus_synced_avr
+#define tlb_flush_page_by_mmuidx tlb_flush_page_by_mmuidx_avr
+#define tlb_flush_page tlb_flush_page_avr
+#define tlb_flush_page_by_mmuidx_all_cpus tlb_flush_page_by_mmuidx_all_cpus_avr
+#define tlb_flush_page_all_cpus tlb_flush_page_all_cpus_avr
+#define tlb_flush_page_by_mmuidx_all_cpus_synced tlb_flush_page_by_mmuidx_all_cpus_synced_avr
+#define tlb_flush_page_all_cpus_synced tlb_flush_page_all_cpus_synced_avr
+#define tlb_protect_code tlb_protect_code_avr
+#define tlb_unprotect_code tlb_unprotect_code_avr
+#define tlb_reset_dirty tlb_reset_dirty_avr
+#define tlb_set_dirty tlb_set_dirty_avr
+#define tlb_set_page_with_attrs tlb_set_page_with_attrs_avr
+#define tlb_set_page tlb_set_page_avr
+#define get_page_addr_code_hostp get_page_addr_code_hostp_avr
+#define get_page_addr_code get_page_addr_code_avr
+#define probe_access probe_access_avr
+#define tlb_vaddr_to_host tlb_vaddr_to_host_avr
+#define helper_ret_ldub_mmu helper_ret_ldub_mmu_avr
+#define helper_le_lduw_mmu helper_le_lduw_mmu_avr
+#define helper_be_lduw_mmu helper_be_lduw_mmu_avr
+#define helper_le_ldul_mmu helper_le_ldul_mmu_avr
+#define helper_be_ldul_mmu helper_be_ldul_mmu_avr
+#define helper_le_ldq_mmu helper_le_ldq_mmu_avr
+#define helper_be_ldq_mmu helper_be_ldq_mmu_avr
+#define helper_ret_ldsb_mmu helper_ret_ldsb_mmu_avr
+#define helper_le_ldsw_mmu helper_le_ldsw_mmu_avr
+#define helper_be_ldsw_mmu helper_be_ldsw_mmu_avr
+#define helper_le_ldsl_mmu helper_le_ldsl_mmu_avr
+#define helper_be_ldsl_mmu helper_be_ldsl_mmu_avr
+#define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_avr
+#define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_avr
+#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_avr
+#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_avr
+#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_avr
+#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_avr
+#define cpu_ldub_data_ra cpu_ldub_data_ra_avr
+#define cpu_ldsb_data_ra cpu_ldsb_data_ra_avr
+#define cpu_lduw_data_ra cpu_lduw_data_ra_avr
+#define cpu_ldsw_data_ra cpu_ldsw_data_ra_avr
+#define cpu_ldl_data_ra cpu_ldl_data_ra_avr
+#define cpu_ldq_data_ra cpu_ldq_data_ra_avr
+#define cpu_ldub_data cpu_ldub_data_avr
+#define cpu_ldsb_data cpu_ldsb_data_avr
+#define cpu_lduw_data cpu_lduw_data_avr
+#define cpu_ldsw_data cpu_ldsw_data_avr
+#define cpu_ldl_data cpu_ldl_data_avr
+#define cpu_ldq_data cpu_ldq_data_avr
+#define helper_ret_stb_mmu helper_ret_stb_mmu_avr
+#define helper_le_stw_mmu helper_le_stw_mmu_avr
+#define helper_be_stw_mmu helper_be_stw_mmu_avr
+#define helper_le_stl_mmu helper_le_stl_mmu_avr
+#define helper_be_stl_mmu helper_be_stl_mmu_avr
+#define helper_le_stq_mmu helper_le_stq_mmu_avr
+#define helper_be_stq_mmu helper_be_stq_mmu_avr
+#define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_avr
+#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_avr
+#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_avr
+#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_avr
+#define cpu_stb_data_ra cpu_stb_data_ra_avr
+#define cpu_stw_data_ra cpu_stw_data_ra_avr
+#define cpu_stl_data_ra cpu_stl_data_ra_avr
+#define cpu_stq_data_ra cpu_stq_data_ra_avr
+#define cpu_stb_data cpu_stb_data_avr
+#define cpu_stw_data cpu_stw_data_avr
+#define cpu_stl_data cpu_stl_data_avr
+#define cpu_stq_data cpu_stq_data_avr
+#define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_avr
+#define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_avr
+#define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_avr
+#define helper_atomic_fetch_andb_mmu helper_atomic_fetch_andb_mmu_avr
+#define helper_atomic_fetch_orb_mmu helper_atomic_fetch_orb_mmu_avr
+#define helper_atomic_fetch_xorb_mmu helper_atomic_fetch_xorb_mmu_avr
+#define helper_atomic_add_fetchb_mmu helper_atomic_add_fetchb_mmu_avr
+#define helper_atomic_and_fetchb_mmu helper_atomic_and_fetchb_mmu_avr
+#define helper_atomic_or_fetchb_mmu helper_atomic_or_fetchb_mmu_avr
+#define helper_atomic_xor_fetchb_mmu helper_atomic_xor_fetchb_mmu_avr
+#define helper_atomic_fetch_sminb_mmu helper_atomic_fetch_sminb_mmu_avr
+#define helper_atomic_fetch_uminb_mmu helper_atomic_fetch_uminb_mmu_avr
+#define helper_atomic_fetch_smaxb_mmu helper_atomic_fetch_smaxb_mmu_avr
+#define helper_atomic_fetch_umaxb_mmu helper_atomic_fetch_umaxb_mmu_avr
+#define helper_atomic_smin_fetchb_mmu helper_atomic_smin_fetchb_mmu_avr
+#define helper_atomic_umin_fetchb_mmu helper_atomic_umin_fetchb_mmu_avr
+#define helper_atomic_smax_fetchb_mmu helper_atomic_smax_fetchb_mmu_avr
+#define helper_atomic_umax_fetchb_mmu helper_atomic_umax_fetchb_mmu_avr
+#define helper_atomic_cmpxchgw_le_mmu helper_atomic_cmpxchgw_le_mmu_avr
+#define helper_atomic_xchgw_le_mmu helper_atomic_xchgw_le_mmu_avr
+#define helper_atomic_fetch_addw_le_mmu helper_atomic_fetch_addw_le_mmu_avr
+#define helper_atomic_fetch_andw_le_mmu helper_atomic_fetch_andw_le_mmu_avr
+#define helper_atomic_fetch_orw_le_mmu helper_atomic_fetch_orw_le_mmu_avr
+#define helper_atomic_fetch_xorw_le_mmu helper_atomic_fetch_xorw_le_mmu_avr
+#define helper_atomic_add_fetchw_le_mmu helper_atomic_add_fetchw_le_mmu_avr
+#define helper_atomic_and_fetchw_le_mmu helper_atomic_and_fetchw_le_mmu_avr
+#define helper_atomic_or_fetchw_le_mmu helper_atomic_or_fetchw_le_mmu_avr
+#define helper_atomic_xor_fetchw_le_mmu helper_atomic_xor_fetchw_le_mmu_avr
+#define helper_atomic_fetch_sminw_le_mmu helper_atomic_fetch_sminw_le_mmu_avr
+#define helper_atomic_fetch_uminw_le_mmu helper_atomic_fetch_uminw_le_mmu_avr
+#define helper_atomic_fetch_smaxw_le_mmu helper_atomic_fetch_smaxw_le_mmu_avr
+#define helper_atomic_fetch_umaxw_le_mmu helper_atomic_fetch_umaxw_le_mmu_avr
+#define helper_atomic_smin_fetchw_le_mmu helper_atomic_smin_fetchw_le_mmu_avr
+#define helper_atomic_umin_fetchw_le_mmu helper_atomic_umin_fetchw_le_mmu_avr
+#define helper_atomic_smax_fetchw_le_mmu helper_atomic_smax_fetchw_le_mmu_avr
+#define helper_atomic_umax_fetchw_le_mmu helper_atomic_umax_fetchw_le_mmu_avr
+#define helper_atomic_cmpxchgw_be_mmu helper_atomic_cmpxchgw_be_mmu_avr
+#define helper_atomic_xchgw_be_mmu helper_atomic_xchgw_be_mmu_avr
+#define helper_atomic_fetch_andw_be_mmu helper_atomic_fetch_andw_be_mmu_avr
+#define helper_atomic_fetch_orw_be_mmu helper_atomic_fetch_orw_be_mmu_avr
+#define helper_atomic_fetch_xorw_be_mmu helper_atomic_fetch_xorw_be_mmu_avr
+#define helper_atomic_and_fetchw_be_mmu helper_atomic_and_fetchw_be_mmu_avr
+#define helper_atomic_or_fetchw_be_mmu helper_atomic_or_fetchw_be_mmu_avr
+#define helper_atomic_xor_fetchw_be_mmu helper_atomic_xor_fetchw_be_mmu_avr
+#define helper_atomic_fetch_sminw_be_mmu helper_atomic_fetch_sminw_be_mmu_avr
+#define helper_atomic_fetch_uminw_be_mmu helper_atomic_fetch_uminw_be_mmu_avr
+#define helper_atomic_fetch_smaxw_be_mmu helper_atomic_fetch_smaxw_be_mmu_avr
+#define helper_atomic_fetch_umaxw_be_mmu helper_atomic_fetch_umaxw_be_mmu_avr
+#define helper_atomic_smin_fetchw_be_mmu helper_atomic_smin_fetchw_be_mmu_avr
+#define helper_atomic_umin_fetchw_be_mmu helper_atomic_umin_fetchw_be_mmu_avr
+#define helper_atomic_smax_fetchw_be_mmu helper_atomic_smax_fetchw_be_mmu_avr
+#define helper_atomic_umax_fetchw_be_mmu helper_atomic_umax_fetchw_be_mmu_avr
+#define helper_atomic_fetch_addw_be_mmu helper_atomic_fetch_addw_be_mmu_avr
+#define helper_atomic_add_fetchw_be_mmu helper_atomic_add_fetchw_be_mmu_avr
+#define helper_atomic_cmpxchgl_le_mmu helper_atomic_cmpxchgl_le_mmu_avr
+#define helper_atomic_xchgl_le_mmu helper_atomic_xchgl_le_mmu_avr
+#define helper_atomic_fetch_addl_le_mmu helper_atomic_fetch_addl_le_mmu_avr
+#define helper_atomic_fetch_andl_le_mmu helper_atomic_fetch_andl_le_mmu_avr
+#define helper_atomic_fetch_orl_le_mmu helper_atomic_fetch_orl_le_mmu_avr
+#define helper_atomic_fetch_xorl_le_mmu helper_atomic_fetch_xorl_le_mmu_avr
+#define helper_atomic_add_fetchl_le_mmu helper_atomic_add_fetchl_le_mmu_avr
+#define helper_atomic_and_fetchl_le_mmu helper_atomic_and_fetchl_le_mmu_avr
+#define helper_atomic_or_fetchl_le_mmu helper_atomic_or_fetchl_le_mmu_avr
+#define helper_atomic_xor_fetchl_le_mmu helper_atomic_xor_fetchl_le_mmu_avr
+#define helper_atomic_fetch_sminl_le_mmu helper_atomic_fetch_sminl_le_mmu_avr
+#define helper_atomic_fetch_uminl_le_mmu helper_atomic_fetch_uminl_le_mmu_avr
+#define helper_atomic_fetch_smaxl_le_mmu helper_atomic_fetch_smaxl_le_mmu_avr
+#define helper_atomic_fetch_umaxl_le_mmu helper_atomic_fetch_umaxl_le_mmu_avr
+#define helper_atomic_smin_fetchl_le_mmu helper_atomic_smin_fetchl_le_mmu_avr
+#define helper_atomic_umin_fetchl_le_mmu helper_atomic_umin_fetchl_le_mmu_avr
+#define helper_atomic_smax_fetchl_le_mmu helper_atomic_smax_fetchl_le_mmu_avr
+#define helper_atomic_umax_fetchl_le_mmu helper_atomic_umax_fetchl_le_mmu_avr
+#define helper_atomic_cmpxchgl_be_mmu helper_atomic_cmpxchgl_be_mmu_avr
+#define helper_atomic_xchgl_be_mmu helper_atomic_xchgl_be_mmu_avr
+#define helper_atomic_fetch_andl_be_mmu helper_atomic_fetch_andl_be_mmu_avr
+#define helper_atomic_fetch_orl_be_mmu helper_atomic_fetch_orl_be_mmu_avr
+#define helper_atomic_fetch_xorl_be_mmu helper_atomic_fetch_xorl_be_mmu_avr
+#define helper_atomic_and_fetchl_be_mmu helper_atomic_and_fetchl_be_mmu_avr
+#define helper_atomic_or_fetchl_be_mmu helper_atomic_or_fetchl_be_mmu_avr
+#define helper_atomic_xor_fetchl_be_mmu helper_atomic_xor_fetchl_be_mmu_avr
+#define helper_atomic_fetch_sminl_be_mmu helper_atomic_fetch_sminl_be_mmu_avr
+#define helper_atomic_fetch_uminl_be_mmu helper_atomic_fetch_uminl_be_mmu_avr
+#define helper_atomic_fetch_smaxl_be_mmu helper_atomic_fetch_smaxl_be_mmu_avr
+#define helper_atomic_fetch_umaxl_be_mmu helper_atomic_fetch_umaxl_be_mmu_avr
+#define helper_atomic_smin_fetchl_be_mmu helper_atomic_smin_fetchl_be_mmu_avr
+#define helper_atomic_umin_fetchl_be_mmu helper_atomic_umin_fetchl_be_mmu_avr
+#define helper_atomic_smax_fetchl_be_mmu helper_atomic_smax_fetchl_be_mmu_avr
+#define helper_atomic_umax_fetchl_be_mmu helper_atomic_umax_fetchl_be_mmu_avr
+#define helper_atomic_fetch_addl_be_mmu helper_atomic_fetch_addl_be_mmu_avr
+#define helper_atomic_add_fetchl_be_mmu helper_atomic_add_fetchl_be_mmu_avr
+#define helper_atomic_cmpxchgq_le_mmu helper_atomic_cmpxchgq_le_mmu_avr
+#define helper_atomic_xchgq_le_mmu helper_atomic_xchgq_le_mmu_avr
+#define helper_atomic_fetch_addq_le_mmu helper_atomic_fetch_addq_le_mmu_avr
+#define helper_atomic_fetch_andq_le_mmu helper_atomic_fetch_andq_le_mmu_avr
+#define helper_atomic_fetch_orq_le_mmu helper_atomic_fetch_orq_le_mmu_avr
+#define helper_atomic_fetch_xorq_le_mmu helper_atomic_fetch_xorq_le_mmu_avr
+#define helper_atomic_add_fetchq_le_mmu helper_atomic_add_fetchq_le_mmu_avr
+#define helper_atomic_and_fetchq_le_mmu helper_atomic_and_fetchq_le_mmu_avr
+#define helper_atomic_or_fetchq_le_mmu helper_atomic_or_fetchq_le_mmu_avr
+#define helper_atomic_xor_fetchq_le_mmu helper_atomic_xor_fetchq_le_mmu_avr
+#define helper_atomic_fetch_sminq_le_mmu helper_atomic_fetch_sminq_le_mmu_avr
+#define helper_atomic_fetch_uminq_le_mmu helper_atomic_fetch_uminq_le_mmu_avr
+#define helper_atomic_fetch_smaxq_le_mmu helper_atomic_fetch_smaxq_le_mmu_avr
+#define helper_atomic_fetch_umaxq_le_mmu helper_atomic_fetch_umaxq_le_mmu_avr
+#define helper_atomic_smin_fetchq_le_mmu helper_atomic_smin_fetchq_le_mmu_avr
+#define helper_atomic_umin_fetchq_le_mmu helper_atomic_umin_fetchq_le_mmu_avr
+#define helper_atomic_smax_fetchq_le_mmu helper_atomic_smax_fetchq_le_mmu_avr
+#define helper_atomic_umax_fetchq_le_mmu helper_atomic_umax_fetchq_le_mmu_avr
+#define helper_atomic_cmpxchgq_be_mmu helper_atomic_cmpxchgq_be_mmu_avr
+#define helper_atomic_xchgq_be_mmu helper_atomic_xchgq_be_mmu_avr
+#define helper_atomic_fetch_andq_be_mmu helper_atomic_fetch_andq_be_mmu_avr
+#define helper_atomic_fetch_orq_be_mmu helper_atomic_fetch_orq_be_mmu_avr
+#define helper_atomic_fetch_xorq_be_mmu helper_atomic_fetch_xorq_be_mmu_avr
+#define helper_atomic_and_fetchq_be_mmu helper_atomic_and_fetchq_be_mmu_avr
+#define helper_atomic_or_fetchq_be_mmu helper_atomic_or_fetchq_be_mmu_avr
+#define helper_atomic_xor_fetchq_be_mmu helper_atomic_xor_fetchq_be_mmu_avr
+#define helper_atomic_fetch_sminq_be_mmu helper_atomic_fetch_sminq_be_mmu_avr
+#define helper_atomic_fetch_uminq_be_mmu helper_atomic_fetch_uminq_be_mmu_avr
+#define helper_atomic_fetch_smaxq_be_mmu helper_atomic_fetch_smaxq_be_mmu_avr
+#define helper_atomic_fetch_umaxq_be_mmu helper_atomic_fetch_umaxq_be_mmu_avr
+#define helper_atomic_smin_fetchq_be_mmu helper_atomic_smin_fetchq_be_mmu_avr
+#define helper_atomic_umin_fetchq_be_mmu helper_atomic_umin_fetchq_be_mmu_avr
+#define helper_atomic_smax_fetchq_be_mmu helper_atomic_smax_fetchq_be_mmu_avr
+#define helper_atomic_umax_fetchq_be_mmu helper_atomic_umax_fetchq_be_mmu_avr
+#define helper_atomic_fetch_addq_be_mmu helper_atomic_fetch_addq_be_mmu_avr
+#define helper_atomic_add_fetchq_be_mmu helper_atomic_add_fetchq_be_mmu_avr
+#define helper_atomic_cmpxchgb helper_atomic_cmpxchgb_avr
+#define helper_atomic_xchgb helper_atomic_xchgb_avr
+#define helper_atomic_fetch_addb helper_atomic_fetch_addb_avr
+#define helper_atomic_fetch_andb helper_atomic_fetch_andb_avr
+#define helper_atomic_fetch_orb helper_atomic_fetch_orb_avr
+#define helper_atomic_fetch_xorb helper_atomic_fetch_xorb_avr
+#define helper_atomic_add_fetchb helper_atomic_add_fetchb_avr
+#define helper_atomic_and_fetchb helper_atomic_and_fetchb_avr
+#define helper_atomic_or_fetchb helper_atomic_or_fetchb_avr
+#define helper_atomic_xor_fetchb helper_atomic_xor_fetchb_avr
+#define helper_atomic_fetch_sminb helper_atomic_fetch_sminb_avr
+#define helper_atomic_fetch_uminb helper_atomic_fetch_uminb_avr
+#define helper_atomic_fetch_smaxb helper_atomic_fetch_smaxb_avr
+#define helper_atomic_fetch_umaxb helper_atomic_fetch_umaxb_avr
+#define helper_atomic_smin_fetchb helper_atomic_smin_fetchb_avr
+#define helper_atomic_umin_fetchb helper_atomic_umin_fetchb_avr
+#define helper_atomic_smax_fetchb helper_atomic_smax_fetchb_avr
+#define helper_atomic_umax_fetchb helper_atomic_umax_fetchb_avr
+#define helper_atomic_cmpxchgw_le helper_atomic_cmpxchgw_le_avr
+#define helper_atomic_xchgw_le helper_atomic_xchgw_le_avr
+#define helper_atomic_fetch_addw_le helper_atomic_fetch_addw_le_avr
+#define helper_atomic_fetch_andw_le helper_atomic_fetch_andw_le_avr
+#define helper_atomic_fetch_orw_le helper_atomic_fetch_orw_le_avr
+#define helper_atomic_fetch_xorw_le helper_atomic_fetch_xorw_le_avr
+#define helper_atomic_add_fetchw_le helper_atomic_add_fetchw_le_avr
+#define helper_atomic_and_fetchw_le helper_atomic_and_fetchw_le_avr
+#define helper_atomic_or_fetchw_le helper_atomic_or_fetchw_le_avr
+#define helper_atomic_xor_fetchw_le helper_atomic_xor_fetchw_le_avr
+#define helper_atomic_fetch_sminw_le helper_atomic_fetch_sminw_le_avr
+#define helper_atomic_fetch_uminw_le helper_atomic_fetch_uminw_le_avr
+#define helper_atomic_fetch_smaxw_le helper_atomic_fetch_smaxw_le_avr
+#define helper_atomic_fetch_umaxw_le helper_atomic_fetch_umaxw_le_avr
+#define helper_atomic_smin_fetchw_le helper_atomic_smin_fetchw_le_avr
+#define helper_atomic_umin_fetchw_le helper_atomic_umin_fetchw_le_avr
+#define helper_atomic_smax_fetchw_le helper_atomic_smax_fetchw_le_avr
+#define helper_atomic_umax_fetchw_le helper_atomic_umax_fetchw_le_avr
+#define helper_atomic_cmpxchgw_be helper_atomic_cmpxchgw_be_avr
+#define helper_atomic_xchgw_be helper_atomic_xchgw_be_avr
+#define helper_atomic_fetch_andw_be helper_atomic_fetch_andw_be_avr
+#define helper_atomic_fetch_orw_be helper_atomic_fetch_orw_be_avr
+#define helper_atomic_fetch_xorw_be helper_atomic_fetch_xorw_be_avr
+#define helper_atomic_and_fetchw_be helper_atomic_and_fetchw_be_avr
+#define helper_atomic_or_fetchw_be helper_atomic_or_fetchw_be_avr
+#define helper_atomic_xor_fetchw_be helper_atomic_xor_fetchw_be_avr
+#define helper_atomic_fetch_sminw_be helper_atomic_fetch_sminw_be_avr
+#define helper_atomic_fetch_uminw_be helper_atomic_fetch_uminw_be_avr
+#define helper_atomic_fetch_smaxw_be helper_atomic_fetch_smaxw_be_avr
+#define helper_atomic_fetch_umaxw_be helper_atomic_fetch_umaxw_be_avr
+#define helper_atomic_smin_fetchw_be helper_atomic_smin_fetchw_be_avr
+#define helper_atomic_umin_fetchw_be helper_atomic_umin_fetchw_be_avr
+#define helper_atomic_smax_fetchw_be helper_atomic_smax_fetchw_be_avr
+#define helper_atomic_umax_fetchw_be helper_atomic_umax_fetchw_be_avr
+#define helper_atomic_fetch_addw_be helper_atomic_fetch_addw_be_avr
+#define helper_atomic_add_fetchw_be helper_atomic_add_fetchw_be_avr
+#define helper_atomic_cmpxchgl_le helper_atomic_cmpxchgl_le_avr
+#define helper_atomic_xchgl_le helper_atomic_xchgl_le_avr
+#define helper_atomic_fetch_addl_le helper_atomic_fetch_addl_le_avr
+#define helper_atomic_fetch_andl_le helper_atomic_fetch_andl_le_avr
+#define helper_atomic_fetch_orl_le helper_atomic_fetch_orl_le_avr
+#define helper_atomic_fetch_xorl_le helper_atomic_fetch_xorl_le_avr
+#define helper_atomic_add_fetchl_le helper_atomic_add_fetchl_le_avr
+#define helper_atomic_and_fetchl_le helper_atomic_and_fetchl_le_avr
+#define helper_atomic_or_fetchl_le helper_atomic_or_fetchl_le_avr
+#define helper_atomic_xor_fetchl_le helper_atomic_xor_fetchl_le_avr
+#define helper_atomic_fetch_sminl_le helper_atomic_fetch_sminl_le_avr
+#define helper_atomic_fetch_uminl_le helper_atomic_fetch_uminl_le_avr
+#define helper_atomic_fetch_smaxl_le helper_atomic_fetch_smaxl_le_avr
+#define helper_atomic_fetch_umaxl_le helper_atomic_fetch_umaxl_le_avr
+#define helper_atomic_smin_fetchl_le helper_atomic_smin_fetchl_le_avr
+#define helper_atomic_umin_fetchl_le helper_atomic_umin_fetchl_le_avr
+#define helper_atomic_smax_fetchl_le helper_atomic_smax_fetchl_le_avr
+#define helper_atomic_umax_fetchl_le helper_atomic_umax_fetchl_le_avr
+#define helper_atomic_cmpxchgl_be helper_atomic_cmpxchgl_be_avr
+#define helper_atomic_xchgl_be helper_atomic_xchgl_be_avr
+#define helper_atomic_fetch_andl_be helper_atomic_fetch_andl_be_avr
+#define helper_atomic_fetch_orl_be helper_atomic_fetch_orl_be_avr
+#define helper_atomic_fetch_xorl_be helper_atomic_fetch_xorl_be_avr
+#define helper_atomic_and_fetchl_be helper_atomic_and_fetchl_be_avr
+#define helper_atomic_or_fetchl_be helper_atomic_or_fetchl_be_avr
+#define helper_atomic_xor_fetchl_be helper_atomic_xor_fetchl_be_avr
+#define helper_atomic_fetch_sminl_be helper_atomic_fetch_sminl_be_avr
+#define helper_atomic_fetch_uminl_be helper_atomic_fetch_uminl_be_avr
+#define helper_atomic_fetch_smaxl_be helper_atomic_fetch_smaxl_be_avr
+#define helper_atomic_fetch_umaxl_be helper_atomic_fetch_umaxl_be_avr
+#define helper_atomic_smin_fetchl_be helper_atomic_smin_fetchl_be_avr
+#define helper_atomic_umin_fetchl_be helper_atomic_umin_fetchl_be_avr
+#define helper_atomic_smax_fetchl_be helper_atomic_smax_fetchl_be_avr
+#define helper_atomic_umax_fetchl_be helper_atomic_umax_fetchl_be_avr
+#define helper_atomic_fetch_addl_be helper_atomic_fetch_addl_be_avr
+#define helper_atomic_add_fetchl_be helper_atomic_add_fetchl_be_avr
+#define helper_atomic_cmpxchgq_le helper_atomic_cmpxchgq_le_avr
+#define helper_atomic_xchgq_le helper_atomic_xchgq_le_avr
+#define helper_atomic_fetch_addq_le helper_atomic_fetch_addq_le_avr
+#define helper_atomic_fetch_andq_le helper_atomic_fetch_andq_le_avr
+#define helper_atomic_fetch_orq_le helper_atomic_fetch_orq_le_avr
+#define helper_atomic_fetch_xorq_le helper_atomic_fetch_xorq_le_avr
+#define helper_atomic_add_fetchq_le helper_atomic_add_fetchq_le_avr
+#define helper_atomic_and_fetchq_le helper_atomic_and_fetchq_le_avr
+#define helper_atomic_or_fetchq_le helper_atomic_or_fetchq_le_avr
+#define helper_atomic_xor_fetchq_le helper_atomic_xor_fetchq_le_avr
+#define helper_atomic_fetch_sminq_le helper_atomic_fetch_sminq_le_avr
+#define helper_atomic_fetch_uminq_le helper_atomic_fetch_uminq_le_avr
+#define helper_atomic_fetch_smaxq_le helper_atomic_fetch_smaxq_le_avr
+#define helper_atomic_fetch_umaxq_le helper_atomic_fetch_umaxq_le_avr
+#define helper_atomic_smin_fetchq_le helper_atomic_smin_fetchq_le_avr
+#define helper_atomic_umin_fetchq_le helper_atomic_umin_fetchq_le_avr
+#define helper_atomic_smax_fetchq_le helper_atomic_smax_fetchq_le_avr
+#define helper_atomic_umax_fetchq_le helper_atomic_umax_fetchq_le_avr
+#define helper_atomic_cmpxchgq_be helper_atomic_cmpxchgq_be_avr
+#define helper_atomic_xchgq_be helper_atomic_xchgq_be_avr
+#define helper_atomic_fetch_andq_be helper_atomic_fetch_andq_be_avr
+#define helper_atomic_fetch_orq_be helper_atomic_fetch_orq_be_avr
+#define helper_atomic_fetch_xorq_be helper_atomic_fetch_xorq_be_avr
+#define helper_atomic_and_fetchq_be helper_atomic_and_fetchq_be_avr
+#define helper_atomic_or_fetchq_be helper_atomic_or_fetchq_be_avr
+#define helper_atomic_xor_fetchq_be helper_atomic_xor_fetchq_be_avr
+#define helper_atomic_fetch_sminq_be helper_atomic_fetch_sminq_be_avr
+#define helper_atomic_fetch_uminq_be helper_atomic_fetch_uminq_be_avr
+#define helper_atomic_fetch_smaxq_be helper_atomic_fetch_smaxq_be_avr
+#define helper_atomic_fetch_umaxq_be helper_atomic_fetch_umaxq_be_avr
+#define helper_atomic_smin_fetchq_be helper_atomic_smin_fetchq_be_avr
+#define helper_atomic_umin_fetchq_be helper_atomic_umin_fetchq_be_avr
+#define helper_atomic_smax_fetchq_be helper_atomic_smax_fetchq_be_avr
+#define helper_atomic_umax_fetchq_be helper_atomic_umax_fetchq_be_avr
+#define helper_atomic_fetch_addq_be helper_atomic_fetch_addq_be_avr
+#define helper_atomic_add_fetchq_be helper_atomic_add_fetchq_be_avr
+#define cpu_ldub_code cpu_ldub_code_avr
+#define cpu_lduw_code cpu_lduw_code_avr
+#define cpu_ldl_code cpu_ldl_code_avr
+#define cpu_ldq_code cpu_ldq_code_avr
+#define helper_div_i32 helper_div_i32_avr
+#define helper_rem_i32 helper_rem_i32_avr
+#define helper_divu_i32 helper_divu_i32_avr
+#define helper_remu_i32 helper_remu_i32_avr
+#define helper_shl_i64 helper_shl_i64_avr
+#define helper_shr_i64 helper_shr_i64_avr
+#define helper_sar_i64 helper_sar_i64_avr
+#define helper_div_i64 helper_div_i64_avr
+#define helper_rem_i64 helper_rem_i64_avr
+#define helper_divu_i64 helper_divu_i64_avr
+#define helper_remu_i64 helper_remu_i64_avr
+#define helper_muluh_i64 helper_muluh_i64_avr
+#define helper_mulsh_i64 helper_mulsh_i64_avr
+#define helper_clz_i32 helper_clz_i32_avr
+#define helper_ctz_i32 helper_ctz_i32_avr
+#define helper_clz_i64 helper_clz_i64_avr
+#define helper_ctz_i64 helper_ctz_i64_avr
+#define helper_clrsb_i32 helper_clrsb_i32_avr
+#define helper_clrsb_i64 helper_clrsb_i64_avr
+#define helper_ctpop_i32 helper_ctpop_i32_avr
+#define helper_ctpop_i64 helper_ctpop_i64_avr
+#define helper_lookup_tb_ptr helper_lookup_tb_ptr_avr
+#define helper_exit_atomic helper_exit_atomic_avr
+#define helper_gvec_add8 helper_gvec_add8_avr
+#define helper_gvec_add16 helper_gvec_add16_avr
+#define helper_gvec_add32 helper_gvec_add32_avr
+#define helper_gvec_add64 helper_gvec_add64_avr
+#define helper_gvec_adds8 helper_gvec_adds8_avr
+#define helper_gvec_adds16 helper_gvec_adds16_avr
+#define helper_gvec_adds32 helper_gvec_adds32_avr
+#define helper_gvec_adds64 helper_gvec_adds64_avr
+#define helper_gvec_sub8 helper_gvec_sub8_avr
+#define helper_gvec_sub16 helper_gvec_sub16_avr
+#define helper_gvec_sub32 helper_gvec_sub32_avr
+#define helper_gvec_sub64 helper_gvec_sub64_avr
+#define helper_gvec_subs8 helper_gvec_subs8_avr
+#define helper_gvec_subs16 helper_gvec_subs16_avr
+#define helper_gvec_subs32 helper_gvec_subs32_avr
+#define helper_gvec_subs64 helper_gvec_subs64_avr
+#define helper_gvec_mul8 helper_gvec_mul8_avr
+#define helper_gvec_mul16 helper_gvec_mul16_avr
+#define helper_gvec_mul32 helper_gvec_mul32_avr
+#define helper_gvec_mul64 helper_gvec_mul64_avr
+#define helper_gvec_muls8 helper_gvec_muls8_avr
+#define helper_gvec_muls16 helper_gvec_muls16_avr
+#define helper_gvec_muls32 helper_gvec_muls32_avr
+#define helper_gvec_muls64 helper_gvec_muls64_avr
+#define helper_gvec_neg8 helper_gvec_neg8_avr
+#define helper_gvec_neg16 helper_gvec_neg16_avr
+#define helper_gvec_neg32 helper_gvec_neg32_avr
+#define helper_gvec_neg64 helper_gvec_neg64_avr
+#define helper_gvec_abs8 helper_gvec_abs8_avr
+#define helper_gvec_abs16 helper_gvec_abs16_avr
+#define helper_gvec_abs32 helper_gvec_abs32_avr
+#define helper_gvec_abs64 helper_gvec_abs64_avr
+#define helper_gvec_mov helper_gvec_mov_avr
+#define helper_gvec_dup64 helper_gvec_dup64_avr
+#define helper_gvec_dup32 helper_gvec_dup32_avr
+#define helper_gvec_dup16 helper_gvec_dup16_avr
+#define helper_gvec_dup8 helper_gvec_dup8_avr
+#define helper_gvec_not helper_gvec_not_avr
+#define helper_gvec_and helper_gvec_and_avr
+#define helper_gvec_or helper_gvec_or_avr
+#define helper_gvec_xor helper_gvec_xor_avr
+#define helper_gvec_andc helper_gvec_andc_avr
+#define helper_gvec_orc helper_gvec_orc_avr
+#define helper_gvec_nand helper_gvec_nand_avr
+#define helper_gvec_nor helper_gvec_nor_avr
+#define helper_gvec_eqv helper_gvec_eqv_avr
+#define helper_gvec_ands helper_gvec_ands_avr
+#define helper_gvec_xors helper_gvec_xors_avr
+#define helper_gvec_ors helper_gvec_ors_avr
+#define helper_gvec_shl8i helper_gvec_shl8i_avr
+#define helper_gvec_shl16i helper_gvec_shl16i_avr
+#define helper_gvec_shl32i helper_gvec_shl32i_avr
+#define helper_gvec_shl64i helper_gvec_shl64i_avr
+#define helper_gvec_shr8i helper_gvec_shr8i_avr
+#define helper_gvec_shr16i helper_gvec_shr16i_avr
+#define helper_gvec_shr32i helper_gvec_shr32i_avr
+#define helper_gvec_shr64i helper_gvec_shr64i_avr
+#define helper_gvec_sar8i helper_gvec_sar8i_avr
+#define helper_gvec_sar16i helper_gvec_sar16i_avr
+#define helper_gvec_sar32i helper_gvec_sar32i_avr
+#define helper_gvec_sar64i helper_gvec_sar64i_avr
+#define helper_gvec_shl8v helper_gvec_shl8v_avr
+#define helper_gvec_shl16v helper_gvec_shl16v_avr
+#define helper_gvec_shl32v helper_gvec_shl32v_avr
+#define helper_gvec_shl64v helper_gvec_shl64v_avr
+#define helper_gvec_shr8v helper_gvec_shr8v_avr
+#define helper_gvec_shr16v helper_gvec_shr16v_avr
+#define helper_gvec_shr32v helper_gvec_shr32v_avr
+#define helper_gvec_shr64v helper_gvec_shr64v_avr
+#define helper_gvec_sar8v helper_gvec_sar8v_avr
+#define helper_gvec_sar16v helper_gvec_sar16v_avr
+#define helper_gvec_sar32v helper_gvec_sar32v_avr
+#define helper_gvec_sar64v helper_gvec_sar64v_avr
+#define helper_gvec_eq8 helper_gvec_eq8_avr
+#define helper_gvec_ne8 helper_gvec_ne8_avr
+#define helper_gvec_lt8 helper_gvec_lt8_avr
+#define helper_gvec_le8 helper_gvec_le8_avr
+#define helper_gvec_ltu8 helper_gvec_ltu8_avr
+#define helper_gvec_leu8 helper_gvec_leu8_avr
+#define helper_gvec_eq16 helper_gvec_eq16_avr
+#define helper_gvec_ne16 helper_gvec_ne16_avr
+#define helper_gvec_lt16 helper_gvec_lt16_avr
+#define helper_gvec_le16 helper_gvec_le16_avr
+#define helper_gvec_ltu16 helper_gvec_ltu16_avr
+#define helper_gvec_leu16 helper_gvec_leu16_avr
+#define helper_gvec_eq32 helper_gvec_eq32_avr
+#define helper_gvec_ne32 helper_gvec_ne32_avr
+#define helper_gvec_lt32 helper_gvec_lt32_avr
+#define helper_gvec_le32 helper_gvec_le32_avr
+#define helper_gvec_ltu32 helper_gvec_ltu32_avr
+#define helper_gvec_leu32 helper_gvec_leu32_avr
+#define helper_gvec_eq64 helper_gvec_eq64_avr
+#define helper_gvec_ne64 helper_gvec_ne64_avr
+#define helper_gvec_lt64 helper_gvec_lt64_avr
+#define helper_gvec_le64 helper_gvec_le64_avr
+#define helper_gvec_ltu64 helper_gvec_ltu64_avr
+#define helper_gvec_leu64 helper_gvec_leu64_avr
+#define helper_gvec_ssadd8 helper_gvec_ssadd8_avr
+#define helper_gvec_ssadd16 helper_gvec_ssadd16_avr
+#define helper_gvec_ssadd32 helper_gvec_ssadd32_avr
+#define helper_gvec_ssadd64 helper_gvec_ssadd64_avr
+#define helper_gvec_sssub8 helper_gvec_sssub8_avr
+#define helper_gvec_sssub16 helper_gvec_sssub16_avr
+#define helper_gvec_sssub32 helper_gvec_sssub32_avr
+#define helper_gvec_sssub64 helper_gvec_sssub64_avr
+#define helper_gvec_usadd8 helper_gvec_usadd8_avr
+#define helper_gvec_usadd16 helper_gvec_usadd16_avr
+#define helper_gvec_usadd32 helper_gvec_usadd32_avr
+#define helper_gvec_usadd64 helper_gvec_usadd64_avr
+#define helper_gvec_ussub8 helper_gvec_ussub8_avr
+#define helper_gvec_ussub16 helper_gvec_ussub16_avr
+#define helper_gvec_ussub32 helper_gvec_ussub32_avr
+#define helper_gvec_ussub64 helper_gvec_ussub64_avr
+#define helper_gvec_smin8 helper_gvec_smin8_avr
+#define helper_gvec_smin16 helper_gvec_smin16_avr
+#define helper_gvec_smin32 helper_gvec_smin32_avr
+#define helper_gvec_smin64 helper_gvec_smin64_avr
+#define helper_gvec_smax8 helper_gvec_smax8_avr
+#define helper_gvec_smax16 helper_gvec_smax16_avr
+#define helper_gvec_smax32 helper_gvec_smax32_avr
+#define helper_gvec_smax64 helper_gvec_smax64_avr
+#define helper_gvec_umin8 helper_gvec_umin8_avr
+#define helper_gvec_umin16 helper_gvec_umin16_avr
+#define helper_gvec_umin32 helper_gvec_umin32_avr
+#define helper_gvec_umin64 helper_gvec_umin64_avr
+#define helper_gvec_umax8 helper_gvec_umax8_avr
+#define helper_gvec_umax16 helper_gvec_umax16_avr
+#define helper_gvec_umax32 helper_gvec_umax32_avr
+#define helper_gvec_umax64 helper_gvec_umax64_avr
+#define helper_gvec_bitsel helper_gvec_bitsel_avr
+#define cpu_restore_state cpu_restore_state_avr
+#define page_collection_lock page_collection_lock_avr
+#define page_collection_unlock page_collection_unlock_avr
+#define free_code_gen_buffer free_code_gen_buffer_avr
+#define tcg_exec_init tcg_exec_init_avr
+#define tb_cleanup tb_cleanup_avr
+#define tb_flush tb_flush_avr
+#define tb_phys_invalidate tb_phys_invalidate_avr
+#define tb_gen_code tb_gen_code_avr
+#define tb_exec_lock tb_exec_lock_avr
+#define tb_exec_unlock tb_exec_unlock_avr
+#define tb_invalidate_phys_page_range tb_invalidate_phys_page_range_avr
+#define tb_invalidate_phys_range tb_invalidate_phys_range_avr
+#define tb_invalidate_phys_page_fast tb_invalidate_phys_page_fast_avr
+#define tb_check_watchpoint tb_check_watchpoint_avr
+#define cpu_io_recompile cpu_io_recompile_avr
+#define tb_flush_jmp_cache tb_flush_jmp_cache_avr
+#define tcg_flush_softmmu_tlb tcg_flush_softmmu_tlb_avr
+#define translator_loop_temp_check translator_loop_temp_check_avr
+#define translator_loop translator_loop_avr
+#define helper_atomic_cmpxchgo_le_mmu helper_atomic_cmpxchgo_le_mmu_avr
+#define helper_atomic_cmpxchgo_be_mmu helper_atomic_cmpxchgo_be_mmu_avr
+#define helper_atomic_ldo_le_mmu helper_atomic_ldo_le_mmu_avr
+#define helper_atomic_ldo_be_mmu helper_atomic_ldo_be_mmu_avr
+#define helper_atomic_sto_le_mmu helper_atomic_sto_le_mmu_avr
+#define helper_atomic_sto_be_mmu helper_atomic_sto_be_mmu_avr
+#define unassigned_mem_ops unassigned_mem_ops_avr
+#define floatx80_infinity floatx80_infinity_avr
+#define dup_const_func dup_const_func_avr
+#define gen_helper_raise_exception gen_helper_raise_exception_avr
+#define gen_helper_raise_interrupt gen_helper_raise_interrupt_avr
+#define gen_helper_vfp_get_fpscr gen_helper_vfp_get_fpscr_avr
+#define gen_helper_vfp_set_fpscr gen_helper_vfp_set_fpscr_avr
+#define gen_helper_cpsr_read gen_helper_cpsr_read_avr
+#define gen_helper_cpsr_write gen_helper_cpsr_write_avr
+#define helper_sleep helper_sleep_avr
+#define helper_unsupported helper_unsupported_avr
+#define helper_debug helper_debug_avr
+#define helper_break helper_break_avr
+#define helper_inb helper_inb_avr
+#define helper_outb helper_outb_avr
+#define helper_fullrd helper_fullrd_avr
+#define helper_fullwr helper_fullwr_avr
+#define helper_wdr helper_wdr_avr
+#define gen_intermediate_code gen_intermediate_code_avr
+#define restore_state_to_opc restore_state_to_opc_avr
+
+#define reg_read reg_read_avr
+#define reg_write reg_write_avr
+#define uc_init uc_init_avr
+#endif
diff --git a/qemu/configure b/qemu/configure
index f52b5b9531..cc5752292f 100755
--- a/qemu/configure
+++ b/qemu/configure
@@ -496,6 +496,8 @@ elif check_define __aarch64__ ; then
   cpu="aarch64"
 elif check_define __tricore__ ; then
   cpu="tricore"
+elif check_define __AVR__ ; then
+  cpu="avr"
 else
   cpu=$(uname -m)
 fi
@@ -539,6 +541,10 @@ case "$cpu" in
     cpu="tricore"
     supported_cpu="yes"
   ;;
+  avr)
+    cpu="avr"
+    supported_cpu="yes"
+  ;;
   *)
     # This will result in either an error or falling back to TCI later
     ARCH=unknown
@@ -867,8 +873,8 @@ QEMU_CFLAGS="$CPU_CFLAGS $QEMU_CFLAGS"
 default_target_list="aarch64-softmmu \
     arm-softmmu m68k-softmmu mips64el-softmmu mips64-softmmu mipsel-softmmu \
     mips-softmmu ppc64-softmmu ppc-softmmu sparc64-softmmu sparc-softmmu \
-    x86_64-softmmu riscv32-softmmu riscv64-softmmu s390x-softmmu \
-    tricore-softmmu"
+    x86_64-softmmu rh850-softmmu riscv32-softmmu riscv64-softmmu s390x-softmmu \
+    tricore-softmmu avr-softmmu"
 
 if test x"$show_help" = x"yes" ; then
 cat << EOF
@@ -2645,7 +2651,7 @@ config_target_mak=$target_dir/config-target.mak
 target_name=$(echo $target | cut -d '-' -f 1)
 target_aligned_only="no"
 case "$target_name" in
-  alpha|hppa|mips64el|mips64|mipsel|mips|mipsn32|mipsn32el|sh4|sh4eb|sparc|sparc64|sparc32plus|xtensa|xtensaeb)
+  alpha|hppa|mips64el|mips64|mipsel|mips|mipsn32|mipsn32el|rh850|sh4|sh4eb|sparc|sparc64|sparc32plus|xtensa|xtensaeb)
   target_aligned_only="yes"
   ;;
 esac
@@ -2761,6 +2767,11 @@ case "$target_name" in
     TARGET_SYSTBL_ABI=common,nospu,32
     echo "TARGET_ABI32=y" >> $config_target_mak
   ;;
+  rh850)
+    TARGET_ARCH=rh850
+    TARGET_ABI_DIR=rh850
+    mttcg=no              # system emulation is not supported for RH850
+  ;;
   riscv32)
     TARGET_BASE_ARCH=riscv
     TARGET_ABI_DIR=riscv
@@ -2803,6 +2814,10 @@ case "$target_name" in
     TARGET_ARCH=tricore
     TARGET_BASE_ARCH=tricore
   ;;
+  avr)
+    TARGET_ARCH=avr
+    TARGET_BASE_ARCH=avr
+  ;;
   unicore32)
   ;;
   xtensa|xtensaeb)
diff --git a/qemu/include/tcg/tcg.h b/qemu/include/tcg/tcg.h
index 695609df0c..f1559fcde0 100644
--- a/qemu/include/tcg/tcg.h
+++ b/qemu/include/tcg/tcg.h
@@ -725,7 +725,7 @@ struct TCGContext {
     void *tb_ret_addr;
 
     /* target/riscv/translate.c */
-    TCGv cpu_gpr[32], cpu_pc, cpu_vl; // also target/mips/translate.c
+    TCGv cpu_gpr[32], cpu_pc, cpu_vl; // also target/mips/translate.c, target/avr/translate.c
     TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
     TCGv load_res;
     TCGv load_val;
@@ -820,6 +820,23 @@ struct TCGContext {
 
     char s390x_cpu_reg_names[16][4]; // renamed from original cpu_reg_names[][] to avoid name clash with m68k
     TCGv_i64 regs[16];
+
+    // target/avr/translate.c
+    TCGv cpu_Cf;
+    TCGv cpu_Zf;
+    TCGv cpu_Nf;
+    TCGv cpu_Vf;
+    TCGv cpu_Sf;
+    TCGv cpu_Hf;
+    TCGv cpu_Tf;
+    TCGv cpu_If;
+    TCGv cpu_rampD;
+    TCGv cpu_rampX;
+    TCGv cpu_rampY;
+    TCGv cpu_rampZ;
+    TCGv cpu_eind;
+    TCGv cpu_sp;
+    TCGv cpu_skip;
 };
 
 static inline size_t temp_idx(TCGContext *tcg_ctx, TCGTemp *ts)
diff --git a/qemu/rh850.h b/qemu/rh850.h
new file mode 100644
index 0000000000..071393cb7c
--- /dev/null
+++ b/qemu/rh850.h
@@ -0,0 +1,1294 @@
+/* Autogen header for Unicorn Engine - DONOT MODIFY */
+#ifndef UNICORN_AUTOGEN_rh850_H
+#define UNICORN_AUTOGEN_rh850_H
+#ifndef UNICORN_ARCH_POSTFIX
+#define UNICORN_ARCH_POSTFIX _rh850
+#endif
+#define unicorn_fill_tlb unicorn_fill_tlb_rh850
+#define reg_read reg_read_rh850
+#define reg_write reg_write_rh850
+#define uc_init uc_init_rh850
+#define uc_add_inline_hook uc_add_inline_hook_rh850
+#define uc_del_inline_hook uc_del_inline_hook_rh850
+#define tb_invalidate_phys_range tb_invalidate_phys_range_rh850
+#define use_idiv_instructions use_idiv_instructions_rh850
+#define arm_arch arm_arch_rh850
+#define tb_target_set_jmp_target tb_target_set_jmp_target_rh850
+#define have_bmi1 have_bmi1_rh850
+#define have_popcnt have_popcnt_rh850
+#define have_avx1 have_avx1_rh850
+#define have_avx2 have_avx2_rh850
+#define have_isa have_isa_rh850
+#define have_altivec have_altivec_rh850
+#define have_vsx have_vsx_rh850
+#define flush_icache_range flush_icache_range_rh850
+#define s390_facilities s390_facilities_rh850
+#define tcg_dump_op tcg_dump_op_rh850
+#define tcg_dump_ops tcg_dump_ops_rh850
+#define tcg_gen_and_i64 tcg_gen_and_i64_rh850
+#define tcg_gen_discard_i64 tcg_gen_discard_i64_rh850
+#define tcg_gen_ld16s_i64 tcg_gen_ld16s_i64_rh850
+#define tcg_gen_ld16u_i64 tcg_gen_ld16u_i64_rh850
+#define tcg_gen_ld32s_i64 tcg_gen_ld32s_i64_rh850
+#define tcg_gen_ld32u_i64 tcg_gen_ld32u_i64_rh850
+#define tcg_gen_ld8s_i64 tcg_gen_ld8s_i64_rh850
+#define tcg_gen_ld8u_i64 tcg_gen_ld8u_i64_rh850
+#define tcg_gen_ld_i64 tcg_gen_ld_i64_rh850
+#define tcg_gen_mov_i64 tcg_gen_mov_i64_rh850
+#define tcg_gen_movi_i64 tcg_gen_movi_i64_rh850
+#define tcg_gen_mul_i64 tcg_gen_mul_i64_rh850
+#define tcg_gen_or_i64 tcg_gen_or_i64_rh850
+#define tcg_gen_sar_i64 tcg_gen_sar_i64_rh850
+#define tcg_gen_shl_i64 tcg_gen_shl_i64_rh850
+#define tcg_gen_shr_i64 tcg_gen_shr_i64_rh850
+#define tcg_gen_st_i64 tcg_gen_st_i64_rh850
+#define tcg_gen_xor_i64 tcg_gen_xor_i64_rh850
+#define cpu_icount_to_ns cpu_icount_to_ns_rh850
+#define cpu_is_stopped cpu_is_stopped_rh850
+#define cpu_get_ticks cpu_get_ticks_rh850
+#define cpu_get_clock cpu_get_clock_rh850
+#define cpu_resume cpu_resume_rh850
+#define qemu_init_vcpu qemu_init_vcpu_rh850
+#define cpu_stop_current cpu_stop_current_rh850
+#define resume_all_vcpus resume_all_vcpus_rh850
+#define vm_start vm_start_rh850
+#define address_space_dispatch_compact address_space_dispatch_compact_rh850
+#define flatview_translate flatview_translate_rh850
+#define address_space_translate_for_iotlb address_space_translate_for_iotlb_rh850
+#define qemu_get_cpu qemu_get_cpu_rh850
+#define cpu_address_space_init cpu_address_space_init_rh850
+#define cpu_get_address_space cpu_get_address_space_rh850
+#define cpu_exec_unrealizefn cpu_exec_unrealizefn_rh850
+#define cpu_exec_initfn cpu_exec_initfn_rh850
+#define cpu_exec_realizefn cpu_exec_realizefn_rh850
+#define tb_invalidate_phys_addr tb_invalidate_phys_addr_rh850
+#define cpu_watchpoint_insert cpu_watchpoint_insert_rh850
+#define cpu_watchpoint_remove_by_ref cpu_watchpoint_remove_by_ref_rh850
+#define cpu_watchpoint_remove_all cpu_watchpoint_remove_all_rh850
+#define cpu_watchpoint_address_matches cpu_watchpoint_address_matches_rh850
+#define cpu_breakpoint_insert cpu_breakpoint_insert_rh850
+#define cpu_breakpoint_remove cpu_breakpoint_remove_rh850
+#define cpu_breakpoint_remove_by_ref cpu_breakpoint_remove_by_ref_rh850
+#define cpu_breakpoint_remove_all cpu_breakpoint_remove_all_rh850
+#define cpu_abort cpu_abort_rh850
+#define cpu_physical_memory_test_and_clear_dirty cpu_physical_memory_test_and_clear_dirty_rh850
+#define memory_region_section_get_iotlb memory_region_section_get_iotlb_rh850
+#define flatview_add_to_dispatch flatview_add_to_dispatch_rh850
+#define qemu_ram_get_host_addr qemu_ram_get_host_addr_rh850
+#define qemu_ram_get_offset qemu_ram_get_offset_rh850
+#define qemu_ram_get_used_length qemu_ram_get_used_length_rh850
+#define qemu_ram_is_shared qemu_ram_is_shared_rh850
+#define qemu_ram_pagesize qemu_ram_pagesize_rh850
+#define qemu_ram_alloc_from_ptr qemu_ram_alloc_from_ptr_rh850
+#define qemu_ram_alloc qemu_ram_alloc_rh850
+#define qemu_ram_free qemu_ram_free_rh850
+#define qemu_map_ram_ptr qemu_map_ram_ptr_rh850
+#define qemu_ram_block_host_offset qemu_ram_block_host_offset_rh850
+#define qemu_ram_block_from_host qemu_ram_block_from_host_rh850
+#define qemu_ram_addr_from_host qemu_ram_addr_from_host_rh850
+#define cpu_check_watchpoint cpu_check_watchpoint_rh850
+#define iotlb_to_section iotlb_to_section_rh850
+#define address_space_dispatch_new address_space_dispatch_new_rh850
+#define address_space_dispatch_free address_space_dispatch_free_rh850
+#define flatview_read_continue flatview_read_continue_rh850
+#define address_space_read_full address_space_read_full_rh850
+#define address_space_write address_space_write_rh850
+#define address_space_rw address_space_rw_rh850
+#define cpu_physical_memory_rw cpu_physical_memory_rw_rh850
+#define address_space_write_rom address_space_write_rom_rh850
+#define cpu_flush_icache_range cpu_flush_icache_range_rh850
+#define cpu_exec_init_all cpu_exec_init_all_rh850
+#define address_space_access_valid address_space_access_valid_rh850
+#define address_space_map address_space_map_rh850
+#define address_space_unmap address_space_unmap_rh850
+#define cpu_physical_memory_map cpu_physical_memory_map_rh850
+#define cpu_physical_memory_unmap cpu_physical_memory_unmap_rh850
+#define cpu_memory_rw_debug cpu_memory_rw_debug_rh850
+#define qemu_target_page_size qemu_target_page_size_rh850
+#define qemu_target_page_bits qemu_target_page_bits_rh850
+#define qemu_target_page_bits_min qemu_target_page_bits_min_rh850
+#define target_words_bigendian target_words_bigendian_rh850
+#define cpu_physical_memory_is_io cpu_physical_memory_is_io_rh850
+#define ram_block_discard_range ram_block_discard_range_rh850
+#define ramblock_is_pmem ramblock_is_pmem_rh850
+#define page_size_init page_size_init_rh850
+#define set_preferred_target_page_bits set_preferred_target_page_bits_rh850
+#define finalize_target_page_bits finalize_target_page_bits_rh850
+#define cpu_outb cpu_outb_rh850
+#define cpu_outw cpu_outw_rh850
+#define cpu_outl cpu_outl_rh850
+#define cpu_inb cpu_inb_rh850
+#define cpu_inw cpu_inw_rh850
+#define cpu_inl cpu_inl_rh850
+#define memory_map memory_map_rh850
+#define memory_map_io memory_map_io_rh850
+#define memory_map_ptr memory_map_ptr_rh850
+#define memory_cow memory_cow_rh850
+#define memory_unmap memory_unmap_rh850
+#define memory_moveout memory_moveout_rh850
+#define memory_movein memory_movein_rh850
+#define memory_free memory_free_rh850
+#define flatview_unref flatview_unref_rh850
+#define address_space_get_flatview address_space_get_flatview_rh850
+#define memory_region_transaction_begin memory_region_transaction_begin_rh850
+#define memory_region_transaction_commit memory_region_transaction_commit_rh850
+#define memory_region_init memory_region_init_rh850
+#define memory_region_access_valid memory_region_access_valid_rh850
+#define memory_region_dispatch_read memory_region_dispatch_read_rh850
+#define memory_region_dispatch_write memory_region_dispatch_write_rh850
+#define memory_region_init_io memory_region_init_io_rh850
+#define memory_region_init_ram_ptr memory_region_init_ram_ptr_rh850
+#define memory_region_size memory_region_size_rh850
+#define memory_region_set_readonly memory_region_set_readonly_rh850
+#define memory_region_get_ram_ptr memory_region_get_ram_ptr_rh850
+#define memory_region_from_host memory_region_from_host_rh850
+#define memory_region_get_ram_addr memory_region_get_ram_addr_rh850
+#define memory_region_add_subregion memory_region_add_subregion_rh850
+#define memory_region_del_subregion memory_region_del_subregion_rh850
+#define memory_region_add_subregion_overlap memory_region_add_subregion_overlap_rh850
+#define memory_region_find memory_region_find_rh850
+#define memory_region_filter_subregions memory_region_filter_subregions_rh850
+#define memory_listener_register memory_listener_register_rh850
+#define memory_listener_unregister memory_listener_unregister_rh850
+#define address_space_remove_listeners address_space_remove_listeners_rh850
+#define address_space_init address_space_init_rh850
+#define address_space_destroy address_space_destroy_rh850
+#define memory_region_init_ram memory_region_init_ram_rh850
+#define memory_mapping_list_add_merge_sorted memory_mapping_list_add_merge_sorted_rh850
+#define find_memory_mapping find_memory_mapping_rh850
+#define exec_inline_op exec_inline_op_rh850
+#define floatx80_default_nan floatx80_default_nan_rh850
+#define float_raise float_raise_rh850
+#define float16_is_quiet_nan float16_is_quiet_nan_rh850
+#define float16_is_signaling_nan float16_is_signaling_nan_rh850
+#define float32_is_quiet_nan float32_is_quiet_nan_rh850
+#define float32_is_signaling_nan float32_is_signaling_nan_rh850
+#define float64_is_quiet_nan float64_is_quiet_nan_rh850
+#define float64_is_signaling_nan float64_is_signaling_nan_rh850
+#define floatx80_is_quiet_nan floatx80_is_quiet_nan_rh850
+#define floatx80_is_signaling_nan floatx80_is_signaling_nan_rh850
+#define floatx80_silence_nan floatx80_silence_nan_rh850
+#define propagateFloatx80NaN propagateFloatx80NaN_rh850
+#define float128_is_quiet_nan float128_is_quiet_nan_rh850
+#define float128_is_signaling_nan float128_is_signaling_nan_rh850
+#define float128_silence_nan float128_silence_nan_rh850
+#define float16_add float16_add_rh850
+#define float16_sub float16_sub_rh850
+#define float32_add float32_add_rh850
+#define float32_sub float32_sub_rh850
+#define float64_add float64_add_rh850
+#define float64_sub float64_sub_rh850
+#define float16_mul float16_mul_rh850
+#define float32_mul float32_mul_rh850
+#define float64_mul float64_mul_rh850
+#define float16_muladd float16_muladd_rh850
+#define float32_muladd float32_muladd_rh850
+#define float64_muladd float64_muladd_rh850
+#define float16_div float16_div_rh850
+#define float32_div float32_div_rh850
+#define float64_div float64_div_rh850
+#define float16_to_float32 float16_to_float32_rh850
+#define float16_to_float64 float16_to_float64_rh850
+#define float32_to_float16 float32_to_float16_rh850
+#define float32_to_float64 float32_to_float64_rh850
+#define float64_to_float16 float64_to_float16_rh850
+#define float64_to_float32 float64_to_float32_rh850
+#define float16_round_to_int float16_round_to_int_rh850
+#define float32_round_to_int float32_round_to_int_rh850
+#define float64_round_to_int float64_round_to_int_rh850
+#define float16_to_int16_scalbn float16_to_int16_scalbn_rh850
+#define float16_to_int32_scalbn float16_to_int32_scalbn_rh850
+#define float16_to_int64_scalbn float16_to_int64_scalbn_rh850
+#define float32_to_int16_scalbn float32_to_int16_scalbn_rh850
+#define float32_to_int32_scalbn float32_to_int32_scalbn_rh850
+#define float32_to_int64_scalbn float32_to_int64_scalbn_rh850
+#define float64_to_int16_scalbn float64_to_int16_scalbn_rh850
+#define float64_to_int32_scalbn float64_to_int32_scalbn_rh850
+#define float64_to_int64_scalbn float64_to_int64_scalbn_rh850
+#define float16_to_int16 float16_to_int16_rh850
+#define float16_to_int32 float16_to_int32_rh850
+#define float16_to_int64 float16_to_int64_rh850
+#define float32_to_int16 float32_to_int16_rh850
+#define float32_to_int32 float32_to_int32_rh850
+#define float32_to_int64 float32_to_int64_rh850
+#define float64_to_int16 float64_to_int16_rh850
+#define float64_to_int32 float64_to_int32_rh850
+#define float64_to_int64 float64_to_int64_rh850
+#define float16_to_int16_round_to_zero float16_to_int16_round_to_zero_rh850
+#define float16_to_int32_round_to_zero float16_to_int32_round_to_zero_rh850
+#define float16_to_int64_round_to_zero float16_to_int64_round_to_zero_rh850
+#define float32_to_int16_round_to_zero float32_to_int16_round_to_zero_rh850
+#define float32_to_int32_round_to_zero float32_to_int32_round_to_zero_rh850
+#define float32_to_int64_round_to_zero float32_to_int64_round_to_zero_rh850
+#define float64_to_int16_round_to_zero float64_to_int16_round_to_zero_rh850
+#define float64_to_int32_round_to_zero float64_to_int32_round_to_zero_rh850
+#define float64_to_int64_round_to_zero float64_to_int64_round_to_zero_rh850
+#define float16_to_uint16_scalbn float16_to_uint16_scalbn_rh850
+#define float16_to_uint32_scalbn float16_to_uint32_scalbn_rh850
+#define float16_to_uint64_scalbn float16_to_uint64_scalbn_rh850
+#define float32_to_uint16_scalbn float32_to_uint16_scalbn_rh850
+#define float32_to_uint32_scalbn float32_to_uint32_scalbn_rh850
+#define float32_to_uint64_scalbn float32_to_uint64_scalbn_rh850
+#define float64_to_uint16_scalbn float64_to_uint16_scalbn_rh850
+#define float64_to_uint32_scalbn float64_to_uint32_scalbn_rh850
+#define float64_to_uint64_scalbn float64_to_uint64_scalbn_rh850
+#define float16_to_uint16 float16_to_uint16_rh850
+#define float16_to_uint32 float16_to_uint32_rh850
+#define float16_to_uint64 float16_to_uint64_rh850
+#define float32_to_uint16 float32_to_uint16_rh850
+#define float32_to_uint32 float32_to_uint32_rh850
+#define float32_to_uint64 float32_to_uint64_rh850
+#define float64_to_uint16 float64_to_uint16_rh850
+#define float64_to_uint32 float64_to_uint32_rh850
+#define float64_to_uint64 float64_to_uint64_rh850
+#define float16_to_uint16_round_to_zero float16_to_uint16_round_to_zero_rh850
+#define float16_to_uint32_round_to_zero float16_to_uint32_round_to_zero_rh850
+#define float16_to_uint64_round_to_zero float16_to_uint64_round_to_zero_rh850
+#define float32_to_uint16_round_to_zero float32_to_uint16_round_to_zero_rh850
+#define float32_to_uint32_round_to_zero float32_to_uint32_round_to_zero_rh850
+#define float32_to_uint64_round_to_zero float32_to_uint64_round_to_zero_rh850
+#define float64_to_uint16_round_to_zero float64_to_uint16_round_to_zero_rh850
+#define float64_to_uint32_round_to_zero float64_to_uint32_round_to_zero_rh850
+#define float64_to_uint64_round_to_zero float64_to_uint64_round_to_zero_rh850
+#define int64_to_float16_scalbn int64_to_float16_scalbn_rh850
+#define int32_to_float16_scalbn int32_to_float16_scalbn_rh850
+#define int16_to_float16_scalbn int16_to_float16_scalbn_rh850
+#define int64_to_float16 int64_to_float16_rh850
+#define int32_to_float16 int32_to_float16_rh850
+#define int16_to_float16 int16_to_float16_rh850
+#define int64_to_float32_scalbn int64_to_float32_scalbn_rh850
+#define int32_to_float32_scalbn int32_to_float32_scalbn_rh850
+#define int16_to_float32_scalbn int16_to_float32_scalbn_rh850
+#define int64_to_float32 int64_to_float32_rh850
+#define int32_to_float32 int32_to_float32_rh850
+#define int16_to_float32 int16_to_float32_rh850
+#define int64_to_float64_scalbn int64_to_float64_scalbn_rh850
+#define int32_to_float64_scalbn int32_to_float64_scalbn_rh850
+#define int16_to_float64_scalbn int16_to_float64_scalbn_rh850
+#define int64_to_float64 int64_to_float64_rh850
+#define int32_to_float64 int32_to_float64_rh850
+#define int16_to_float64 int16_to_float64_rh850
+#define uint64_to_float16_scalbn uint64_to_float16_scalbn_rh850
+#define uint32_to_float16_scalbn uint32_to_float16_scalbn_rh850
+#define uint16_to_float16_scalbn uint16_to_float16_scalbn_rh850
+#define uint64_to_float16 uint64_to_float16_rh850
+#define uint32_to_float16 uint32_to_float16_rh850
+#define uint16_to_float16 uint16_to_float16_rh850
+#define uint64_to_float32_scalbn uint64_to_float32_scalbn_rh850
+#define uint32_to_float32_scalbn uint32_to_float32_scalbn_rh850
+#define uint16_to_float32_scalbn uint16_to_float32_scalbn_rh850
+#define uint64_to_float32 uint64_to_float32_rh850
+#define uint32_to_float32 uint32_to_float32_rh850
+#define uint16_to_float32 uint16_to_float32_rh850
+#define uint64_to_float64_scalbn uint64_to_float64_scalbn_rh850
+#define uint32_to_float64_scalbn uint32_to_float64_scalbn_rh850
+#define uint16_to_float64_scalbn uint16_to_float64_scalbn_rh850
+#define uint64_to_float64 uint64_to_float64_rh850
+#define uint32_to_float64 uint32_to_float64_rh850
+#define uint16_to_float64 uint16_to_float64_rh850
+#define float16_min float16_min_rh850
+#define float16_minnum float16_minnum_rh850
+#define float16_minnummag float16_minnummag_rh850
+#define float16_max float16_max_rh850
+#define float16_maxnum float16_maxnum_rh850
+#define float16_maxnummag float16_maxnummag_rh850
+#define float32_min float32_min_rh850
+#define float32_minnum float32_minnum_rh850
+#define float32_minnummag float32_minnummag_rh850
+#define float32_max float32_max_rh850
+#define float32_maxnum float32_maxnum_rh850
+#define float32_maxnummag float32_maxnummag_rh850
+#define float64_min float64_min_rh850
+#define float64_minnum float64_minnum_rh850
+#define float64_minnummag float64_minnummag_rh850
+#define float64_max float64_max_rh850
+#define float64_maxnum float64_maxnum_rh850
+#define float64_maxnummag float64_maxnummag_rh850
+#define float16_compare float16_compare_rh850
+#define float16_compare_quiet float16_compare_quiet_rh850
+#define float32_compare float32_compare_rh850
+#define float32_compare_quiet float32_compare_quiet_rh850
+#define float64_compare float64_compare_rh850
+#define float64_compare_quiet float64_compare_quiet_rh850
+#define float16_scalbn float16_scalbn_rh850
+#define float32_scalbn float32_scalbn_rh850
+#define float64_scalbn float64_scalbn_rh850
+#define float16_sqrt float16_sqrt_rh850
+#define float32_sqrt float32_sqrt_rh850
+#define float64_sqrt float64_sqrt_rh850
+#define float16_default_nan float16_default_nan_rh850
+#define float32_default_nan float32_default_nan_rh850
+#define float64_default_nan float64_default_nan_rh850
+#define float128_default_nan float128_default_nan_rh850
+#define float16_silence_nan float16_silence_nan_rh850
+#define float32_silence_nan float32_silence_nan_rh850
+#define float64_silence_nan float64_silence_nan_rh850
+#define float16_squash_input_denormal float16_squash_input_denormal_rh850
+#define float32_squash_input_denormal float32_squash_input_denormal_rh850
+#define float64_squash_input_denormal float64_squash_input_denormal_rh850
+#define normalizeFloatx80Subnormal normalizeFloatx80Subnormal_rh850
+#define roundAndPackFloatx80 roundAndPackFloatx80_rh850
+#define normalizeRoundAndPackFloatx80 normalizeRoundAndPackFloatx80_rh850
+#define int32_to_floatx80 int32_to_floatx80_rh850
+#define int32_to_float128 int32_to_float128_rh850
+#define int64_to_floatx80 int64_to_floatx80_rh850
+#define int64_to_float128 int64_to_float128_rh850
+#define uint64_to_float128 uint64_to_float128_rh850
+#define float32_to_floatx80 float32_to_floatx80_rh850
+#define float32_to_float128 float32_to_float128_rh850
+#define float32_rem float32_rem_rh850
+#define float32_exp2 float32_exp2_rh850
+#define float32_log2 float32_log2_rh850
+#define float32_eq float32_eq_rh850
+#define float32_le float32_le_rh850
+#define float32_lt float32_lt_rh850
+#define float32_unordered float32_unordered_rh850
+#define float32_eq_quiet float32_eq_quiet_rh850
+#define float32_le_quiet float32_le_quiet_rh850
+#define float32_lt_quiet float32_lt_quiet_rh850
+#define float32_unordered_quiet float32_unordered_quiet_rh850
+#define float64_to_floatx80 float64_to_floatx80_rh850
+#define float64_to_float128 float64_to_float128_rh850
+#define float64_rem float64_rem_rh850
+#define float64_log2 float64_log2_rh850
+#define float64_eq float64_eq_rh850
+#define float64_le float64_le_rh850
+#define float64_lt float64_lt_rh850
+#define float64_unordered float64_unordered_rh850
+#define float64_eq_quiet float64_eq_quiet_rh850
+#define float64_le_quiet float64_le_quiet_rh850
+#define float64_lt_quiet float64_lt_quiet_rh850
+#define float64_unordered_quiet float64_unordered_quiet_rh850
+#define floatx80_to_int32 floatx80_to_int32_rh850
+#define floatx80_to_int32_round_to_zero floatx80_to_int32_round_to_zero_rh850
+#define floatx80_to_int64 floatx80_to_int64_rh850
+#define floatx80_to_int64_round_to_zero floatx80_to_int64_round_to_zero_rh850
+#define floatx80_to_float32 floatx80_to_float32_rh850
+#define floatx80_to_float64 floatx80_to_float64_rh850
+#define floatx80_to_float128 floatx80_to_float128_rh850
+#define floatx80_round floatx80_round_rh850
+#define floatx80_round_to_int floatx80_round_to_int_rh850
+#define floatx80_add floatx80_add_rh850
+#define floatx80_sub floatx80_sub_rh850
+#define floatx80_mul floatx80_mul_rh850
+#define floatx80_div floatx80_div_rh850
+#define floatx80_rem floatx80_rem_rh850
+#define floatx80_sqrt floatx80_sqrt_rh850
+#define floatx80_eq floatx80_eq_rh850
+#define floatx80_le floatx80_le_rh850
+#define floatx80_lt floatx80_lt_rh850
+#define floatx80_unordered floatx80_unordered_rh850
+#define floatx80_eq_quiet floatx80_eq_quiet_rh850
+#define floatx80_le_quiet floatx80_le_quiet_rh850
+#define floatx80_lt_quiet floatx80_lt_quiet_rh850
+#define floatx80_unordered_quiet floatx80_unordered_quiet_rh850
+#define float128_to_int32 float128_to_int32_rh850
+#define float128_to_int32_round_to_zero float128_to_int32_round_to_zero_rh850
+#define float128_to_int64 float128_to_int64_rh850
+#define float128_to_int64_round_to_zero float128_to_int64_round_to_zero_rh850
+#define float128_to_uint64 float128_to_uint64_rh850
+#define float128_to_uint64_round_to_zero float128_to_uint64_round_to_zero_rh850
+#define float128_to_uint32_round_to_zero float128_to_uint32_round_to_zero_rh850
+#define float128_to_uint32 float128_to_uint32_rh850
+#define float128_to_float32 float128_to_float32_rh850
+#define float128_to_float64 float128_to_float64_rh850
+#define float128_to_floatx80 float128_to_floatx80_rh850
+#define float128_round_to_int float128_round_to_int_rh850
+#define float128_add float128_add_rh850
+#define float128_sub float128_sub_rh850
+#define float128_mul float128_mul_rh850
+#define float128_div float128_div_rh850
+#define float128_rem float128_rem_rh850
+#define float128_sqrt float128_sqrt_rh850
+#define float128_eq float128_eq_rh850
+#define float128_le float128_le_rh850
+#define float128_lt float128_lt_rh850
+#define float128_unordered float128_unordered_rh850
+#define float128_eq_quiet float128_eq_quiet_rh850
+#define float128_le_quiet float128_le_quiet_rh850
+#define float128_lt_quiet float128_lt_quiet_rh850
+#define float128_unordered_quiet float128_unordered_quiet_rh850
+#define floatx80_compare floatx80_compare_rh850
+#define floatx80_compare_quiet floatx80_compare_quiet_rh850
+#define float128_compare float128_compare_rh850
+#define float128_compare_quiet float128_compare_quiet_rh850
+#define floatx80_scalbn floatx80_scalbn_rh850
+#define float128_scalbn float128_scalbn_rh850
+#define softfloat_init softfloat_init_rh850
+#define tcg_optimize tcg_optimize_rh850
+#define gen_new_label gen_new_label_rh850
+#define tcg_can_emit_vec_op tcg_can_emit_vec_op_rh850
+#define tcg_expand_vec_op tcg_expand_vec_op_rh850
+#define tcg_register_jit tcg_register_jit_rh850
+#define tcg_tb_insert tcg_tb_insert_rh850
+#define tcg_tb_remove tcg_tb_remove_rh850
+#define tcg_tb_lookup tcg_tb_lookup_rh850
+#define tcg_tb_foreach tcg_tb_foreach_rh850
+#define tcg_nb_tbs tcg_nb_tbs_rh850
+#define tcg_region_reset_all tcg_region_reset_all_rh850
+#define tcg_region_init tcg_region_init_rh850
+#define tcg_code_size tcg_code_size_rh850
+#define tcg_code_capacity tcg_code_capacity_rh850
+#define tcg_tb_phys_invalidate_count tcg_tb_phys_invalidate_count_rh850
+#define tcg_malloc_internal tcg_malloc_internal_rh850
+#define tcg_pool_reset tcg_pool_reset_rh850
+#define tcg_context_init tcg_context_init_rh850
+#define tcg_tb_alloc tcg_tb_alloc_rh850
+#define tcg_prologue_init tcg_prologue_init_rh850
+#define tcg_func_start tcg_func_start_rh850
+#define tcg_set_frame tcg_set_frame_rh850
+#define tcg_global_mem_new_internal tcg_global_mem_new_internal_rh850
+#define tcg_temp_new_internal tcg_temp_new_internal_rh850
+#define tcg_temp_new_vec tcg_temp_new_vec_rh850
+#define tcg_temp_new_vec_matching tcg_temp_new_vec_matching_rh850
+#define tcg_temp_free_internal tcg_temp_free_internal_rh850
+#define tcg_const_i32 tcg_const_i32_rh850
+#define tcg_const_i64 tcg_const_i64_rh850
+#define tcg_const_local_i32 tcg_const_local_i32_rh850
+#define tcg_const_local_i64 tcg_const_local_i64_rh850
+#define tcg_op_supported tcg_op_supported_rh850
+#define tcg_gen_callN tcg_gen_callN_rh850
+#define tcg_op_remove tcg_op_remove_rh850
+#define tcg_emit_op tcg_emit_op_rh850
+#define tcg_op_insert_before tcg_op_insert_before_rh850
+#define tcg_op_insert_after tcg_op_insert_after_rh850
+#define tcg_cpu_exec_time tcg_cpu_exec_time_rh850
+#define tcg_gen_code tcg_gen_code_rh850
+#define tcg_gen_op1 tcg_gen_op1_rh850
+#define tcg_gen_op2 tcg_gen_op2_rh850
+#define tcg_gen_op3 tcg_gen_op3_rh850
+#define tcg_gen_op4 tcg_gen_op4_rh850
+#define tcg_gen_op5 tcg_gen_op5_rh850
+#define tcg_gen_op6 tcg_gen_op6_rh850
+#define tcg_gen_mb tcg_gen_mb_rh850
+#define tcg_gen_addi_i32 tcg_gen_addi_i32_rh850
+#define tcg_gen_subfi_i32 tcg_gen_subfi_i32_rh850
+#define tcg_gen_subi_i32 tcg_gen_subi_i32_rh850
+#define tcg_gen_andi_i32 tcg_gen_andi_i32_rh850
+#define tcg_gen_ori_i32 tcg_gen_ori_i32_rh850
+#define tcg_gen_xori_i32 tcg_gen_xori_i32_rh850
+#define tcg_gen_shli_i32 tcg_gen_shli_i32_rh850
+#define tcg_gen_shri_i32 tcg_gen_shri_i32_rh850
+#define tcg_gen_sari_i32 tcg_gen_sari_i32_rh850
+#define tcg_gen_brcond_i32 tcg_gen_brcond_i32_rh850
+#define tcg_gen_brcondi_i32 tcg_gen_brcondi_i32_rh850
+#define tcg_gen_setcond_i32 tcg_gen_setcond_i32_rh850
+#define tcg_gen_setcondi_i32 tcg_gen_setcondi_i32_rh850
+#define tcg_gen_muli_i32 tcg_gen_muli_i32_rh850
+#define tcg_gen_div_i32 tcg_gen_div_i32_rh850
+#define tcg_gen_rem_i32 tcg_gen_rem_i32_rh850
+#define tcg_gen_divu_i32 tcg_gen_divu_i32_rh850
+#define tcg_gen_remu_i32 tcg_gen_remu_i32_rh850
+#define tcg_gen_andc_i32 tcg_gen_andc_i32_rh850
+#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_rh850
+#define tcg_gen_nand_i32 tcg_gen_nand_i32_rh850
+#define tcg_gen_nor_i32 tcg_gen_nor_i32_rh850
+#define tcg_gen_orc_i32 tcg_gen_orc_i32_rh850
+#define tcg_gen_clz_i32 tcg_gen_clz_i32_rh850
+#define tcg_gen_clzi_i32 tcg_gen_clzi_i32_rh850
+#define tcg_gen_ctz_i32 tcg_gen_ctz_i32_rh850
+#define tcg_gen_ctzi_i32 tcg_gen_ctzi_i32_rh850
+#define tcg_gen_clrsb_i32 tcg_gen_clrsb_i32_rh850
+#define tcg_gen_ctpop_i32 tcg_gen_ctpop_i32_rh850
+#define tcg_gen_rotl_i32 tcg_gen_rotl_i32_rh850
+#define tcg_gen_rotli_i32 tcg_gen_rotli_i32_rh850
+#define tcg_gen_rotr_i32 tcg_gen_rotr_i32_rh850
+#define tcg_gen_rotri_i32 tcg_gen_rotri_i32_rh850
+#define tcg_gen_deposit_i32 tcg_gen_deposit_i32_rh850
+#define tcg_gen_deposit_z_i32 tcg_gen_deposit_z_i32_rh850
+#define tcg_gen_extract_i32 tcg_gen_extract_i32_rh850
+#define tcg_gen_sextract_i32 tcg_gen_sextract_i32_rh850
+#define tcg_gen_extract2_i32 tcg_gen_extract2_i32_rh850
+#define tcg_gen_movcond_i32 tcg_gen_movcond_i32_rh850
+#define tcg_gen_add2_i32 tcg_gen_add2_i32_rh850
+#define tcg_gen_sub2_i32 tcg_gen_sub2_i32_rh850
+#define tcg_gen_mulu2_i32 tcg_gen_mulu2_i32_rh850
+#define tcg_gen_muls2_i32 tcg_gen_muls2_i32_rh850
+#define tcg_gen_mulsu2_i32 tcg_gen_mulsu2_i32_rh850
+#define tcg_gen_ext8s_i32 tcg_gen_ext8s_i32_rh850
+#define tcg_gen_ext16s_i32 tcg_gen_ext16s_i32_rh850
+#define tcg_gen_ext8u_i32 tcg_gen_ext8u_i32_rh850
+#define tcg_gen_ext16u_i32 tcg_gen_ext16u_i32_rh850
+#define tcg_gen_bswap16_i32 tcg_gen_bswap16_i32_rh850
+#define tcg_gen_bswap32_i32 tcg_gen_bswap32_i32_rh850
+#define tcg_gen_smin_i32 tcg_gen_smin_i32_rh850
+#define tcg_gen_umin_i32 tcg_gen_umin_i32_rh850
+#define tcg_gen_smax_i32 tcg_gen_smax_i32_rh850
+#define tcg_gen_umax_i32 tcg_gen_umax_i32_rh850
+#define tcg_gen_abs_i32 tcg_gen_abs_i32_rh850
+#define tcg_gen_addi_i64 tcg_gen_addi_i64_rh850
+#define tcg_gen_subfi_i64 tcg_gen_subfi_i64_rh850
+#define tcg_gen_subi_i64 tcg_gen_subi_i64_rh850
+#define tcg_gen_andi_i64 tcg_gen_andi_i64_rh850
+#define tcg_gen_ori_i64 tcg_gen_ori_i64_rh850
+#define tcg_gen_xori_i64 tcg_gen_xori_i64_rh850
+#define tcg_gen_shli_i64 tcg_gen_shli_i64_rh850
+#define tcg_gen_shri_i64 tcg_gen_shri_i64_rh850
+#define tcg_gen_sari_i64 tcg_gen_sari_i64_rh850
+#define tcg_gen_brcond_i64 tcg_gen_brcond_i64_rh850
+#define tcg_gen_brcondi_i64 tcg_gen_brcondi_i64_rh850
+#define tcg_gen_setcond_i64 tcg_gen_setcond_i64_rh850
+#define tcg_gen_setcondi_i64 tcg_gen_setcondi_i64_rh850
+#define tcg_gen_muli_i64 tcg_gen_muli_i64_rh850
+#define tcg_gen_div_i64 tcg_gen_div_i64_rh850
+#define tcg_gen_rem_i64 tcg_gen_rem_i64_rh850
+#define tcg_gen_divu_i64 tcg_gen_divu_i64_rh850
+#define tcg_gen_remu_i64 tcg_gen_remu_i64_rh850
+#define tcg_gen_ext8s_i64 tcg_gen_ext8s_i64_rh850
+#define tcg_gen_ext16s_i64 tcg_gen_ext16s_i64_rh850
+#define tcg_gen_ext32s_i64 tcg_gen_ext32s_i64_rh850
+#define tcg_gen_ext8u_i64 tcg_gen_ext8u_i64_rh850
+#define tcg_gen_ext16u_i64 tcg_gen_ext16u_i64_rh850
+#define tcg_gen_ext32u_i64 tcg_gen_ext32u_i64_rh850
+#define tcg_gen_bswap16_i64 tcg_gen_bswap16_i64_rh850
+#define tcg_gen_bswap32_i64 tcg_gen_bswap32_i64_rh850
+#define tcg_gen_bswap64_i64 tcg_gen_bswap64_i64_rh850
+#define tcg_gen_not_i64 tcg_gen_not_i64_rh850
+#define tcg_gen_andc_i64 tcg_gen_andc_i64_rh850
+#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_rh850
+#define tcg_gen_nand_i64 tcg_gen_nand_i64_rh850
+#define tcg_gen_nor_i64 tcg_gen_nor_i64_rh850
+#define tcg_gen_orc_i64 tcg_gen_orc_i64_rh850
+#define tcg_gen_clz_i64 tcg_gen_clz_i64_rh850
+#define tcg_gen_clzi_i64 tcg_gen_clzi_i64_rh850
+#define tcg_gen_ctz_i64 tcg_gen_ctz_i64_rh850
+#define tcg_gen_ctzi_i64 tcg_gen_ctzi_i64_rh850
+#define tcg_gen_clrsb_i64 tcg_gen_clrsb_i64_rh850
+#define tcg_gen_ctpop_i64 tcg_gen_ctpop_i64_rh850
+#define tcg_gen_rotl_i64 tcg_gen_rotl_i64_rh850
+#define tcg_gen_rotli_i64 tcg_gen_rotli_i64_rh850
+#define tcg_gen_rotr_i64 tcg_gen_rotr_i64_rh850
+#define tcg_gen_rotri_i64 tcg_gen_rotri_i64_rh850
+#define tcg_gen_deposit_i64 tcg_gen_deposit_i64_rh850
+#define tcg_gen_deposit_z_i64 tcg_gen_deposit_z_i64_rh850
+#define tcg_gen_extract_i64 tcg_gen_extract_i64_rh850
+#define tcg_gen_sextract_i64 tcg_gen_sextract_i64_rh850
+#define tcg_gen_extract2_i64 tcg_gen_extract2_i64_rh850
+#define tcg_gen_movcond_i64 tcg_gen_movcond_i64_rh850
+#define tcg_gen_add2_i64 tcg_gen_add2_i64_rh850
+#define tcg_gen_sub2_i64 tcg_gen_sub2_i64_rh850
+#define tcg_gen_mulu2_i64 tcg_gen_mulu2_i64_rh850
+#define tcg_gen_muls2_i64 tcg_gen_muls2_i64_rh850
+#define tcg_gen_mulsu2_i64 tcg_gen_mulsu2_i64_rh850
+#define tcg_gen_smin_i64 tcg_gen_smin_i64_rh850
+#define tcg_gen_umin_i64 tcg_gen_umin_i64_rh850
+#define tcg_gen_smax_i64 tcg_gen_smax_i64_rh850
+#define tcg_gen_umax_i64 tcg_gen_umax_i64_rh850
+#define tcg_gen_abs_i64 tcg_gen_abs_i64_rh850
+#define tcg_gen_extrl_i64_i32 tcg_gen_extrl_i64_i32_rh850
+#define tcg_gen_extrh_i64_i32 tcg_gen_extrh_i64_i32_rh850
+#define tcg_gen_extu_i32_i64 tcg_gen_extu_i32_i64_rh850
+#define tcg_gen_ext_i32_i64 tcg_gen_ext_i32_i64_rh850
+#define tcg_gen_concat_i32_i64 tcg_gen_concat_i32_i64_rh850
+#define tcg_gen_extr_i64_i32 tcg_gen_extr_i64_i32_rh850
+#define tcg_gen_extr32_i64 tcg_gen_extr32_i64_rh850
+#define tcg_gen_exit_tb tcg_gen_exit_tb_rh850
+#define tcg_gen_goto_tb tcg_gen_goto_tb_rh850
+#define tcg_gen_lookup_and_goto_ptr tcg_gen_lookup_and_goto_ptr_rh850
+#define check_exit_request check_exit_request_rh850
+#define tcg_gen_qemu_ld_i32 tcg_gen_qemu_ld_i32_rh850
+#define tcg_gen_qemu_st_i32 tcg_gen_qemu_st_i32_rh850
+#define tcg_gen_qemu_ld_i64 tcg_gen_qemu_ld_i64_rh850
+#define tcg_gen_qemu_st_i64 tcg_gen_qemu_st_i64_rh850
+#define tcg_gen_atomic_cmpxchg_i32 tcg_gen_atomic_cmpxchg_i32_rh850
+#define tcg_gen_atomic_cmpxchg_i64 tcg_gen_atomic_cmpxchg_i64_rh850
+#define tcg_gen_atomic_fetch_add_i32 tcg_gen_atomic_fetch_add_i32_rh850
+#define tcg_gen_atomic_fetch_add_i64 tcg_gen_atomic_fetch_add_i64_rh850
+#define tcg_gen_atomic_fetch_and_i32 tcg_gen_atomic_fetch_and_i32_rh850
+#define tcg_gen_atomic_fetch_and_i64 tcg_gen_atomic_fetch_and_i64_rh850
+#define tcg_gen_atomic_fetch_or_i32 tcg_gen_atomic_fetch_or_i32_rh850
+#define tcg_gen_atomic_fetch_or_i64 tcg_gen_atomic_fetch_or_i64_rh850
+#define tcg_gen_atomic_fetch_xor_i32 tcg_gen_atomic_fetch_xor_i32_rh850
+#define tcg_gen_atomic_fetch_xor_i64 tcg_gen_atomic_fetch_xor_i64_rh850
+#define tcg_gen_atomic_fetch_smin_i32 tcg_gen_atomic_fetch_smin_i32_rh850
+#define tcg_gen_atomic_fetch_smin_i64 tcg_gen_atomic_fetch_smin_i64_rh850
+#define tcg_gen_atomic_fetch_umin_i32 tcg_gen_atomic_fetch_umin_i32_rh850
+#define tcg_gen_atomic_fetch_umin_i64 tcg_gen_atomic_fetch_umin_i64_rh850
+#define tcg_gen_atomic_fetch_smax_i32 tcg_gen_atomic_fetch_smax_i32_rh850
+#define tcg_gen_atomic_fetch_smax_i64 tcg_gen_atomic_fetch_smax_i64_rh850
+#define tcg_gen_atomic_fetch_umax_i32 tcg_gen_atomic_fetch_umax_i32_rh850
+#define tcg_gen_atomic_fetch_umax_i64 tcg_gen_atomic_fetch_umax_i64_rh850
+#define tcg_gen_atomic_add_fetch_i32 tcg_gen_atomic_add_fetch_i32_rh850
+#define tcg_gen_atomic_add_fetch_i64 tcg_gen_atomic_add_fetch_i64_rh850
+#define tcg_gen_atomic_and_fetch_i32 tcg_gen_atomic_and_fetch_i32_rh850
+#define tcg_gen_atomic_and_fetch_i64 tcg_gen_atomic_and_fetch_i64_rh850
+#define tcg_gen_atomic_or_fetch_i32 tcg_gen_atomic_or_fetch_i32_rh850
+#define tcg_gen_atomic_or_fetch_i64 tcg_gen_atomic_or_fetch_i64_rh850
+#define tcg_gen_atomic_xor_fetch_i32 tcg_gen_atomic_xor_fetch_i32_rh850
+#define tcg_gen_atomic_xor_fetch_i64 tcg_gen_atomic_xor_fetch_i64_rh850
+#define tcg_gen_atomic_smin_fetch_i32 tcg_gen_atomic_smin_fetch_i32_rh850
+#define tcg_gen_atomic_smin_fetch_i64 tcg_gen_atomic_smin_fetch_i64_rh850
+#define tcg_gen_atomic_umin_fetch_i32 tcg_gen_atomic_umin_fetch_i32_rh850
+#define tcg_gen_atomic_umin_fetch_i64 tcg_gen_atomic_umin_fetch_i64_rh850
+#define tcg_gen_atomic_smax_fetch_i32 tcg_gen_atomic_smax_fetch_i32_rh850
+#define tcg_gen_atomic_smax_fetch_i64 tcg_gen_atomic_smax_fetch_i64_rh850
+#define tcg_gen_atomic_umax_fetch_i32 tcg_gen_atomic_umax_fetch_i32_rh850
+#define tcg_gen_atomic_umax_fetch_i64 tcg_gen_atomic_umax_fetch_i64_rh850
+#define tcg_gen_atomic_xchg_i32 tcg_gen_atomic_xchg_i32_rh850
+#define tcg_gen_atomic_xchg_i64 tcg_gen_atomic_xchg_i64_rh850
+#define simd_desc simd_desc_rh850
+#define tcg_gen_gvec_2_ool tcg_gen_gvec_2_ool_rh850
+#define tcg_gen_gvec_2i_ool tcg_gen_gvec_2i_ool_rh850
+#define tcg_gen_gvec_3_ool tcg_gen_gvec_3_ool_rh850
+#define tcg_gen_gvec_4_ool tcg_gen_gvec_4_ool_rh850
+#define tcg_gen_gvec_5_ool tcg_gen_gvec_5_ool_rh850
+#define tcg_gen_gvec_2_ptr tcg_gen_gvec_2_ptr_rh850
+#define tcg_gen_gvec_3_ptr tcg_gen_gvec_3_ptr_rh850
+#define tcg_gen_gvec_4_ptr tcg_gen_gvec_4_ptr_rh850
+#define tcg_gen_gvec_5_ptr tcg_gen_gvec_5_ptr_rh850
+#define tcg_gen_gvec_2 tcg_gen_gvec_2_rh850
+#define tcg_gen_gvec_2i tcg_gen_gvec_2i_rh850
+#define tcg_gen_gvec_2s tcg_gen_gvec_2s_rh850
+#define tcg_gen_gvec_3 tcg_gen_gvec_3_rh850
+#define tcg_gen_gvec_3i tcg_gen_gvec_3i_rh850
+#define tcg_gen_gvec_4 tcg_gen_gvec_4_rh850
+#define tcg_gen_gvec_mov tcg_gen_gvec_mov_rh850
+#define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_rh850
+#define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_rh850
+#define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_rh850
+#define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_rh850
+#define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_rh850
+#define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_rh850
+#define tcg_gen_gvec_dup8i tcg_gen_gvec_dup8i_rh850
+#define tcg_gen_gvec_not tcg_gen_gvec_not_rh850
+#define tcg_gen_vec_add8_i64 tcg_gen_vec_add8_i64_rh850
+#define tcg_gen_vec_add16_i64 tcg_gen_vec_add16_i64_rh850
+#define tcg_gen_vec_add32_i64 tcg_gen_vec_add32_i64_rh850
+#define tcg_gen_gvec_add tcg_gen_gvec_add_rh850
+#define tcg_gen_gvec_adds tcg_gen_gvec_adds_rh850
+#define tcg_gen_gvec_addi tcg_gen_gvec_addi_rh850
+#define tcg_gen_gvec_subs tcg_gen_gvec_subs_rh850
+#define tcg_gen_vec_sub8_i64 tcg_gen_vec_sub8_i64_rh850
+#define tcg_gen_vec_sub16_i64 tcg_gen_vec_sub16_i64_rh850
+#define tcg_gen_vec_sub32_i64 tcg_gen_vec_sub32_i64_rh850
+#define tcg_gen_gvec_sub tcg_gen_gvec_sub_rh850
+#define tcg_gen_gvec_mul tcg_gen_gvec_mul_rh850
+#define tcg_gen_gvec_muls tcg_gen_gvec_muls_rh850
+#define tcg_gen_gvec_muli tcg_gen_gvec_muli_rh850
+#define tcg_gen_gvec_ssadd tcg_gen_gvec_ssadd_rh850
+#define tcg_gen_gvec_sssub tcg_gen_gvec_sssub_rh850
+#define tcg_gen_gvec_usadd tcg_gen_gvec_usadd_rh850
+#define tcg_gen_gvec_ussub tcg_gen_gvec_ussub_rh850
+#define tcg_gen_gvec_smin tcg_gen_gvec_smin_rh850
+#define tcg_gen_gvec_umin tcg_gen_gvec_umin_rh850
+#define tcg_gen_gvec_smax tcg_gen_gvec_smax_rh850
+#define tcg_gen_gvec_umax tcg_gen_gvec_umax_rh850
+#define tcg_gen_vec_neg8_i64 tcg_gen_vec_neg8_i64_rh850
+#define tcg_gen_vec_neg16_i64 tcg_gen_vec_neg16_i64_rh850
+#define tcg_gen_vec_neg32_i64 tcg_gen_vec_neg32_i64_rh850
+#define tcg_gen_gvec_neg tcg_gen_gvec_neg_rh850
+#define tcg_gen_gvec_abs tcg_gen_gvec_abs_rh850
+#define tcg_gen_gvec_and tcg_gen_gvec_and_rh850
+#define tcg_gen_gvec_or tcg_gen_gvec_or_rh850
+#define tcg_gen_gvec_xor tcg_gen_gvec_xor_rh850
+#define tcg_gen_gvec_andc tcg_gen_gvec_andc_rh850
+#define tcg_gen_gvec_orc tcg_gen_gvec_orc_rh850
+#define tcg_gen_gvec_nand tcg_gen_gvec_nand_rh850
+#define tcg_gen_gvec_nor tcg_gen_gvec_nor_rh850
+#define tcg_gen_gvec_eqv tcg_gen_gvec_eqv_rh850
+#define tcg_gen_gvec_ands tcg_gen_gvec_ands_rh850
+#define tcg_gen_gvec_andi tcg_gen_gvec_andi_rh850
+#define tcg_gen_gvec_xors tcg_gen_gvec_xors_rh850
+#define tcg_gen_gvec_xori tcg_gen_gvec_xori_rh850
+#define tcg_gen_gvec_ors tcg_gen_gvec_ors_rh850
+#define tcg_gen_gvec_ori tcg_gen_gvec_ori_rh850
+#define tcg_gen_vec_shl8i_i64 tcg_gen_vec_shl8i_i64_rh850
+#define tcg_gen_vec_shl16i_i64 tcg_gen_vec_shl16i_i64_rh850
+#define tcg_gen_gvec_shli tcg_gen_gvec_shli_rh850
+#define tcg_gen_vec_shr8i_i64 tcg_gen_vec_shr8i_i64_rh850
+#define tcg_gen_vec_shr16i_i64 tcg_gen_vec_shr16i_i64_rh850
+#define tcg_gen_gvec_shri tcg_gen_gvec_shri_rh850
+#define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_rh850
+#define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_rh850
+#define tcg_gen_gvec_sari tcg_gen_gvec_sari_rh850
+#define tcg_gen_gvec_shls tcg_gen_gvec_shls_rh850
+#define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_rh850
+#define tcg_gen_gvec_sars tcg_gen_gvec_sars_rh850
+#define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_rh850
+#define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_rh850
+#define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_rh850
+#define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_rh850
+#define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_rh850
+#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_rh850
+#define vec_gen_2 vec_gen_2_rh850
+#define vec_gen_3 vec_gen_3_rh850
+#define vec_gen_4 vec_gen_4_rh850
+#define tcg_gen_mov_vec tcg_gen_mov_vec_rh850
+#define tcg_const_zeros_vec tcg_const_zeros_vec_rh850
+#define tcg_const_ones_vec tcg_const_ones_vec_rh850
+#define tcg_const_zeros_vec_matching tcg_const_zeros_vec_matching_rh850
+#define tcg_const_ones_vec_matching tcg_const_ones_vec_matching_rh850
+#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_rh850
+#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_rh850
+#define tcg_gen_dup16i_vec tcg_gen_dup16i_vec_rh850
+#define tcg_gen_dup8i_vec tcg_gen_dup8i_vec_rh850
+#define tcg_gen_dupi_vec tcg_gen_dupi_vec_rh850
+#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_rh850
+#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_rh850
+#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_rh850
+#define tcg_gen_ld_vec tcg_gen_ld_vec_rh850
+#define tcg_gen_st_vec tcg_gen_st_vec_rh850
+#define tcg_gen_stl_vec tcg_gen_stl_vec_rh850
+#define tcg_gen_and_vec tcg_gen_and_vec_rh850
+#define tcg_gen_or_vec tcg_gen_or_vec_rh850
+#define tcg_gen_xor_vec tcg_gen_xor_vec_rh850
+#define tcg_gen_andc_vec tcg_gen_andc_vec_rh850
+#define tcg_gen_orc_vec tcg_gen_orc_vec_rh850
+#define tcg_gen_nand_vec tcg_gen_nand_vec_rh850
+#define tcg_gen_nor_vec tcg_gen_nor_vec_rh850
+#define tcg_gen_eqv_vec tcg_gen_eqv_vec_rh850
+#define tcg_gen_not_vec tcg_gen_not_vec_rh850
+#define tcg_gen_neg_vec tcg_gen_neg_vec_rh850
+#define tcg_gen_abs_vec tcg_gen_abs_vec_rh850
+#define tcg_gen_shli_vec tcg_gen_shli_vec_rh850
+#define tcg_gen_shri_vec tcg_gen_shri_vec_rh850
+#define tcg_gen_sari_vec tcg_gen_sari_vec_rh850
+#define tcg_gen_cmp_vec tcg_gen_cmp_vec_rh850
+#define tcg_gen_add_vec tcg_gen_add_vec_rh850
+#define tcg_gen_sub_vec tcg_gen_sub_vec_rh850
+#define tcg_gen_mul_vec tcg_gen_mul_vec_rh850
+#define tcg_gen_ssadd_vec tcg_gen_ssadd_vec_rh850
+#define tcg_gen_usadd_vec tcg_gen_usadd_vec_rh850
+#define tcg_gen_sssub_vec tcg_gen_sssub_vec_rh850
+#define tcg_gen_ussub_vec tcg_gen_ussub_vec_rh850
+#define tcg_gen_smin_vec tcg_gen_smin_vec_rh850
+#define tcg_gen_umin_vec tcg_gen_umin_vec_rh850
+#define tcg_gen_smax_vec tcg_gen_smax_vec_rh850
+#define tcg_gen_umax_vec tcg_gen_umax_vec_rh850
+#define tcg_gen_shlv_vec tcg_gen_shlv_vec_rh850
+#define tcg_gen_shrv_vec tcg_gen_shrv_vec_rh850
+#define tcg_gen_sarv_vec tcg_gen_sarv_vec_rh850
+#define tcg_gen_shls_vec tcg_gen_shls_vec_rh850
+#define tcg_gen_shrs_vec tcg_gen_shrs_vec_rh850
+#define tcg_gen_sars_vec tcg_gen_sars_vec_rh850
+#define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_rh850
+#define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_rh850
+#define tb_htable_lookup tb_htable_lookup_rh850
+#define tb_set_jmp_target tb_set_jmp_target_rh850
+#define cpu_exec cpu_exec_rh850
+#define cpu_loop_exit_noexc cpu_loop_exit_noexc_rh850
+#define cpu_reloading_memory_map cpu_reloading_memory_map_rh850
+#define cpu_loop_exit cpu_loop_exit_rh850
+#define cpu_loop_exit_restore cpu_loop_exit_restore_rh850
+#define cpu_loop_exit_atomic cpu_loop_exit_atomic_rh850
+#define tlb_init tlb_init_rh850
+#define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_rh850
+#define tlb_flush tlb_flush_rh850
+#define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_rh850
+#define tlb_flush_all_cpus tlb_flush_all_cpus_rh850
+#define tlb_flush_by_mmuidx_all_cpus_synced tlb_flush_by_mmuidx_all_cpus_synced_rh850
+#define tlb_flush_all_cpus_synced tlb_flush_all_cpus_synced_rh850
+#define tlb_flush_page_by_mmuidx tlb_flush_page_by_mmuidx_rh850
+#define tlb_flush_page tlb_flush_page_rh850
+#define tlb_flush_page_by_mmuidx_all_cpus tlb_flush_page_by_mmuidx_all_cpus_rh850
+#define tlb_flush_page_all_cpus tlb_flush_page_all_cpus_rh850
+#define tlb_flush_page_by_mmuidx_all_cpus_synced tlb_flush_page_by_mmuidx_all_cpus_synced_rh850
+#define tlb_flush_page_all_cpus_synced tlb_flush_page_all_cpus_synced_rh850
+#define tlb_protect_code tlb_protect_code_rh850
+#define tlb_unprotect_code tlb_unprotect_code_rh850
+#define tlb_reset_dirty tlb_reset_dirty_rh850
+#define tlb_set_dirty tlb_set_dirty_rh850
+#define tlb_set_page_with_attrs tlb_set_page_with_attrs_rh850
+#define tlb_set_page tlb_set_page_rh850
+#define get_page_addr_code_hostp get_page_addr_code_hostp_rh850
+#define get_page_addr_code get_page_addr_code_rh850
+#define probe_access probe_access_rh850
+#define tlb_vaddr_to_host tlb_vaddr_to_host_rh850
+#define helper_ret_ldub_mmu helper_ret_ldub_mmu_rh850
+#define helper_le_lduw_mmu helper_le_lduw_mmu_rh850
+#define helper_be_lduw_mmu helper_be_lduw_mmu_rh850
+#define helper_le_ldul_mmu helper_le_ldul_mmu_rh850
+#define helper_be_ldul_mmu helper_be_ldul_mmu_rh850
+#define helper_le_ldq_mmu helper_le_ldq_mmu_rh850
+#define helper_be_ldq_mmu helper_be_ldq_mmu_rh850
+#define helper_ret_ldsb_mmu helper_ret_ldsb_mmu_rh850
+#define helper_le_ldsw_mmu helper_le_ldsw_mmu_rh850
+#define helper_be_ldsw_mmu helper_be_ldsw_mmu_rh850
+#define helper_le_ldsl_mmu helper_le_ldsl_mmu_rh850
+#define helper_be_ldsl_mmu helper_be_ldsl_mmu_rh850
+#define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_rh850
+#define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_rh850
+#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_rh850
+#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_rh850
+#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_rh850
+#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_rh850
+#define cpu_ldub_data_ra cpu_ldub_data_ra_rh850
+#define cpu_ldsb_data_ra cpu_ldsb_data_ra_rh850
+#define cpu_lduw_data_ra cpu_lduw_data_ra_rh850
+#define cpu_ldsw_data_ra cpu_ldsw_data_ra_rh850
+#define cpu_ldl_data_ra cpu_ldl_data_ra_rh850
+#define cpu_ldq_data_ra cpu_ldq_data_ra_rh850
+#define cpu_ldub_data cpu_ldub_data_rh850
+#define cpu_ldsb_data cpu_ldsb_data_rh850
+#define cpu_lduw_data cpu_lduw_data_rh850
+#define cpu_ldsw_data cpu_ldsw_data_rh850
+#define cpu_ldl_data cpu_ldl_data_rh850
+#define cpu_ldq_data cpu_ldq_data_rh850
+#define helper_ret_stb_mmu helper_ret_stb_mmu_rh850
+#define helper_le_stw_mmu helper_le_stw_mmu_rh850
+#define helper_be_stw_mmu helper_be_stw_mmu_rh850
+#define helper_le_stl_mmu helper_le_stl_mmu_rh850
+#define helper_be_stl_mmu helper_be_stl_mmu_rh850
+#define helper_le_stq_mmu helper_le_stq_mmu_rh850
+#define helper_be_stq_mmu helper_be_stq_mmu_rh850
+#define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_rh850
+#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_rh850
+#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_rh850
+#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_rh850
+#define cpu_stb_data_ra cpu_stb_data_ra_rh850
+#define cpu_stw_data_ra cpu_stw_data_ra_rh850
+#define cpu_stl_data_ra cpu_stl_data_ra_rh850
+#define cpu_stq_data_ra cpu_stq_data_ra_rh850
+#define cpu_stb_data cpu_stb_data_rh850
+#define cpu_stw_data cpu_stw_data_rh850
+#define cpu_stl_data cpu_stl_data_rh850
+#define cpu_stq_data cpu_stq_data_rh850
+#define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_rh850
+#define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_rh850
+#define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_rh850
+#define helper_atomic_fetch_andb_mmu helper_atomic_fetch_andb_mmu_rh850
+#define helper_atomic_fetch_orb_mmu helper_atomic_fetch_orb_mmu_rh850
+#define helper_atomic_fetch_xorb_mmu helper_atomic_fetch_xorb_mmu_rh850
+#define helper_atomic_add_fetchb_mmu helper_atomic_add_fetchb_mmu_rh850
+#define helper_atomic_and_fetchb_mmu helper_atomic_and_fetchb_mmu_rh850
+#define helper_atomic_or_fetchb_mmu helper_atomic_or_fetchb_mmu_rh850
+#define helper_atomic_xor_fetchb_mmu helper_atomic_xor_fetchb_mmu_rh850
+#define helper_atomic_fetch_sminb_mmu helper_atomic_fetch_sminb_mmu_rh850
+#define helper_atomic_fetch_uminb_mmu helper_atomic_fetch_uminb_mmu_rh850
+#define helper_atomic_fetch_smaxb_mmu helper_atomic_fetch_smaxb_mmu_rh850
+#define helper_atomic_fetch_umaxb_mmu helper_atomic_fetch_umaxb_mmu_rh850
+#define helper_atomic_smin_fetchb_mmu helper_atomic_smin_fetchb_mmu_rh850
+#define helper_atomic_umin_fetchb_mmu helper_atomic_umin_fetchb_mmu_rh850
+#define helper_atomic_smax_fetchb_mmu helper_atomic_smax_fetchb_mmu_rh850
+#define helper_atomic_umax_fetchb_mmu helper_atomic_umax_fetchb_mmu_rh850
+#define helper_atomic_cmpxchgw_le_mmu helper_atomic_cmpxchgw_le_mmu_rh850
+#define helper_atomic_xchgw_le_mmu helper_atomic_xchgw_le_mmu_rh850
+#define helper_atomic_fetch_addw_le_mmu helper_atomic_fetch_addw_le_mmu_rh850
+#define helper_atomic_fetch_andw_le_mmu helper_atomic_fetch_andw_le_mmu_rh850
+#define helper_atomic_fetch_orw_le_mmu helper_atomic_fetch_orw_le_mmu_rh850
+#define helper_atomic_fetch_xorw_le_mmu helper_atomic_fetch_xorw_le_mmu_rh850
+#define helper_atomic_add_fetchw_le_mmu helper_atomic_add_fetchw_le_mmu_rh850
+#define helper_atomic_and_fetchw_le_mmu helper_atomic_and_fetchw_le_mmu_rh850
+#define helper_atomic_or_fetchw_le_mmu helper_atomic_or_fetchw_le_mmu_rh850
+#define helper_atomic_xor_fetchw_le_mmu helper_atomic_xor_fetchw_le_mmu_rh850
+#define helper_atomic_fetch_sminw_le_mmu helper_atomic_fetch_sminw_le_mmu_rh850
+#define helper_atomic_fetch_uminw_le_mmu helper_atomic_fetch_uminw_le_mmu_rh850
+#define helper_atomic_fetch_smaxw_le_mmu helper_atomic_fetch_smaxw_le_mmu_rh850
+#define helper_atomic_fetch_umaxw_le_mmu helper_atomic_fetch_umaxw_le_mmu_rh850
+#define helper_atomic_smin_fetchw_le_mmu helper_atomic_smin_fetchw_le_mmu_rh850
+#define helper_atomic_umin_fetchw_le_mmu helper_atomic_umin_fetchw_le_mmu_rh850
+#define helper_atomic_smax_fetchw_le_mmu helper_atomic_smax_fetchw_le_mmu_rh850
+#define helper_atomic_umax_fetchw_le_mmu helper_atomic_umax_fetchw_le_mmu_rh850
+#define helper_atomic_cmpxchgw_be_mmu helper_atomic_cmpxchgw_be_mmu_rh850
+#define helper_atomic_xchgw_be_mmu helper_atomic_xchgw_be_mmu_rh850
+#define helper_atomic_fetch_andw_be_mmu helper_atomic_fetch_andw_be_mmu_rh850
+#define helper_atomic_fetch_orw_be_mmu helper_atomic_fetch_orw_be_mmu_rh850
+#define helper_atomic_fetch_xorw_be_mmu helper_atomic_fetch_xorw_be_mmu_rh850
+#define helper_atomic_and_fetchw_be_mmu helper_atomic_and_fetchw_be_mmu_rh850
+#define helper_atomic_or_fetchw_be_mmu helper_atomic_or_fetchw_be_mmu_rh850
+#define helper_atomic_xor_fetchw_be_mmu helper_atomic_xor_fetchw_be_mmu_rh850
+#define helper_atomic_fetch_sminw_be_mmu helper_atomic_fetch_sminw_be_mmu_rh850
+#define helper_atomic_fetch_uminw_be_mmu helper_atomic_fetch_uminw_be_mmu_rh850
+#define helper_atomic_fetch_smaxw_be_mmu helper_atomic_fetch_smaxw_be_mmu_rh850
+#define helper_atomic_fetch_umaxw_be_mmu helper_atomic_fetch_umaxw_be_mmu_rh850
+#define helper_atomic_smin_fetchw_be_mmu helper_atomic_smin_fetchw_be_mmu_rh850
+#define helper_atomic_umin_fetchw_be_mmu helper_atomic_umin_fetchw_be_mmu_rh850
+#define helper_atomic_smax_fetchw_be_mmu helper_atomic_smax_fetchw_be_mmu_rh850
+#define helper_atomic_umax_fetchw_be_mmu helper_atomic_umax_fetchw_be_mmu_rh850
+#define helper_atomic_fetch_addw_be_mmu helper_atomic_fetch_addw_be_mmu_rh850
+#define helper_atomic_add_fetchw_be_mmu helper_atomic_add_fetchw_be_mmu_rh850
+#define helper_atomic_cmpxchgl_le_mmu helper_atomic_cmpxchgl_le_mmu_rh850
+#define helper_atomic_xchgl_le_mmu helper_atomic_xchgl_le_mmu_rh850
+#define helper_atomic_fetch_addl_le_mmu helper_atomic_fetch_addl_le_mmu_rh850
+#define helper_atomic_fetch_andl_le_mmu helper_atomic_fetch_andl_le_mmu_rh850
+#define helper_atomic_fetch_orl_le_mmu helper_atomic_fetch_orl_le_mmu_rh850
+#define helper_atomic_fetch_xorl_le_mmu helper_atomic_fetch_xorl_le_mmu_rh850
+#define helper_atomic_add_fetchl_le_mmu helper_atomic_add_fetchl_le_mmu_rh850
+#define helper_atomic_and_fetchl_le_mmu helper_atomic_and_fetchl_le_mmu_rh850
+#define helper_atomic_or_fetchl_le_mmu helper_atomic_or_fetchl_le_mmu_rh850
+#define helper_atomic_xor_fetchl_le_mmu helper_atomic_xor_fetchl_le_mmu_rh850
+#define helper_atomic_fetch_sminl_le_mmu helper_atomic_fetch_sminl_le_mmu_rh850
+#define helper_atomic_fetch_uminl_le_mmu helper_atomic_fetch_uminl_le_mmu_rh850
+#define helper_atomic_fetch_smaxl_le_mmu helper_atomic_fetch_smaxl_le_mmu_rh850
+#define helper_atomic_fetch_umaxl_le_mmu helper_atomic_fetch_umaxl_le_mmu_rh850
+#define helper_atomic_smin_fetchl_le_mmu helper_atomic_smin_fetchl_le_mmu_rh850
+#define helper_atomic_umin_fetchl_le_mmu helper_atomic_umin_fetchl_le_mmu_rh850
+#define helper_atomic_smax_fetchl_le_mmu helper_atomic_smax_fetchl_le_mmu_rh850
+#define helper_atomic_umax_fetchl_le_mmu helper_atomic_umax_fetchl_le_mmu_rh850
+#define helper_atomic_cmpxchgl_be_mmu helper_atomic_cmpxchgl_be_mmu_rh850
+#define helper_atomic_xchgl_be_mmu helper_atomic_xchgl_be_mmu_rh850
+#define helper_atomic_fetch_andl_be_mmu helper_atomic_fetch_andl_be_mmu_rh850
+#define helper_atomic_fetch_orl_be_mmu helper_atomic_fetch_orl_be_mmu_rh850
+#define helper_atomic_fetch_xorl_be_mmu helper_atomic_fetch_xorl_be_mmu_rh850
+#define helper_atomic_and_fetchl_be_mmu helper_atomic_and_fetchl_be_mmu_rh850
+#define helper_atomic_or_fetchl_be_mmu helper_atomic_or_fetchl_be_mmu_rh850
+#define helper_atomic_xor_fetchl_be_mmu helper_atomic_xor_fetchl_be_mmu_rh850
+#define helper_atomic_fetch_sminl_be_mmu helper_atomic_fetch_sminl_be_mmu_rh850
+#define helper_atomic_fetch_uminl_be_mmu helper_atomic_fetch_uminl_be_mmu_rh850
+#define helper_atomic_fetch_smaxl_be_mmu helper_atomic_fetch_smaxl_be_mmu_rh850
+#define helper_atomic_fetch_umaxl_be_mmu helper_atomic_fetch_umaxl_be_mmu_rh850
+#define helper_atomic_smin_fetchl_be_mmu helper_atomic_smin_fetchl_be_mmu_rh850
+#define helper_atomic_umin_fetchl_be_mmu helper_atomic_umin_fetchl_be_mmu_rh850
+#define helper_atomic_smax_fetchl_be_mmu helper_atomic_smax_fetchl_be_mmu_rh850
+#define helper_atomic_umax_fetchl_be_mmu helper_atomic_umax_fetchl_be_mmu_rh850
+#define helper_atomic_fetch_addl_be_mmu helper_atomic_fetch_addl_be_mmu_rh850
+#define helper_atomic_add_fetchl_be_mmu helper_atomic_add_fetchl_be_mmu_rh850
+#define helper_atomic_cmpxchgq_le_mmu helper_atomic_cmpxchgq_le_mmu_rh850
+#define helper_atomic_xchgq_le_mmu helper_atomic_xchgq_le_mmu_rh850
+#define helper_atomic_fetch_addq_le_mmu helper_atomic_fetch_addq_le_mmu_rh850
+#define helper_atomic_fetch_andq_le_mmu helper_atomic_fetch_andq_le_mmu_rh850
+#define helper_atomic_fetch_orq_le_mmu helper_atomic_fetch_orq_le_mmu_rh850
+#define helper_atomic_fetch_xorq_le_mmu helper_atomic_fetch_xorq_le_mmu_rh850
+#define helper_atomic_add_fetchq_le_mmu helper_atomic_add_fetchq_le_mmu_rh850
+#define helper_atomic_and_fetchq_le_mmu helper_atomic_and_fetchq_le_mmu_rh850
+#define helper_atomic_or_fetchq_le_mmu helper_atomic_or_fetchq_le_mmu_rh850
+#define helper_atomic_xor_fetchq_le_mmu helper_atomic_xor_fetchq_le_mmu_rh850
+#define helper_atomic_fetch_sminq_le_mmu helper_atomic_fetch_sminq_le_mmu_rh850
+#define helper_atomic_fetch_uminq_le_mmu helper_atomic_fetch_uminq_le_mmu_rh850
+#define helper_atomic_fetch_smaxq_le_mmu helper_atomic_fetch_smaxq_le_mmu_rh850
+#define helper_atomic_fetch_umaxq_le_mmu helper_atomic_fetch_umaxq_le_mmu_rh850
+#define helper_atomic_smin_fetchq_le_mmu helper_atomic_smin_fetchq_le_mmu_rh850
+#define helper_atomic_umin_fetchq_le_mmu helper_atomic_umin_fetchq_le_mmu_rh850
+#define helper_atomic_smax_fetchq_le_mmu helper_atomic_smax_fetchq_le_mmu_rh850
+#define helper_atomic_umax_fetchq_le_mmu helper_atomic_umax_fetchq_le_mmu_rh850
+#define helper_atomic_cmpxchgq_be_mmu helper_atomic_cmpxchgq_be_mmu_rh850
+#define helper_atomic_xchgq_be_mmu helper_atomic_xchgq_be_mmu_rh850
+#define helper_atomic_fetch_andq_be_mmu helper_atomic_fetch_andq_be_mmu_rh850
+#define helper_atomic_fetch_orq_be_mmu helper_atomic_fetch_orq_be_mmu_rh850
+#define helper_atomic_fetch_xorq_be_mmu helper_atomic_fetch_xorq_be_mmu_rh850
+#define helper_atomic_and_fetchq_be_mmu helper_atomic_and_fetchq_be_mmu_rh850
+#define helper_atomic_or_fetchq_be_mmu helper_atomic_or_fetchq_be_mmu_rh850
+#define helper_atomic_xor_fetchq_be_mmu helper_atomic_xor_fetchq_be_mmu_rh850
+#define helper_atomic_fetch_sminq_be_mmu helper_atomic_fetch_sminq_be_mmu_rh850
+#define helper_atomic_fetch_uminq_be_mmu helper_atomic_fetch_uminq_be_mmu_rh850
+#define helper_atomic_fetch_smaxq_be_mmu helper_atomic_fetch_smaxq_be_mmu_rh850
+#define helper_atomic_fetch_umaxq_be_mmu helper_atomic_fetch_umaxq_be_mmu_rh850
+#define helper_atomic_smin_fetchq_be_mmu helper_atomic_smin_fetchq_be_mmu_rh850
+#define helper_atomic_umin_fetchq_be_mmu helper_atomic_umin_fetchq_be_mmu_rh850
+#define helper_atomic_smax_fetchq_be_mmu helper_atomic_smax_fetchq_be_mmu_rh850
+#define helper_atomic_umax_fetchq_be_mmu helper_atomic_umax_fetchq_be_mmu_rh850
+#define helper_atomic_fetch_addq_be_mmu helper_atomic_fetch_addq_be_mmu_rh850
+#define helper_atomic_add_fetchq_be_mmu helper_atomic_add_fetchq_be_mmu_rh850
+#define helper_atomic_cmpxchgb helper_atomic_cmpxchgb_rh850
+#define helper_atomic_xchgb helper_atomic_xchgb_rh850
+#define helper_atomic_fetch_addb helper_atomic_fetch_addb_rh850
+#define helper_atomic_fetch_andb helper_atomic_fetch_andb_rh850
+#define helper_atomic_fetch_orb helper_atomic_fetch_orb_rh850
+#define helper_atomic_fetch_xorb helper_atomic_fetch_xorb_rh850
+#define helper_atomic_add_fetchb helper_atomic_add_fetchb_rh850
+#define helper_atomic_and_fetchb helper_atomic_and_fetchb_rh850
+#define helper_atomic_or_fetchb helper_atomic_or_fetchb_rh850
+#define helper_atomic_xor_fetchb helper_atomic_xor_fetchb_rh850
+#define helper_atomic_fetch_sminb helper_atomic_fetch_sminb_rh850
+#define helper_atomic_fetch_uminb helper_atomic_fetch_uminb_rh850
+#define helper_atomic_fetch_smaxb helper_atomic_fetch_smaxb_rh850
+#define helper_atomic_fetch_umaxb helper_atomic_fetch_umaxb_rh850
+#define helper_atomic_smin_fetchb helper_atomic_smin_fetchb_rh850
+#define helper_atomic_umin_fetchb helper_atomic_umin_fetchb_rh850
+#define helper_atomic_smax_fetchb helper_atomic_smax_fetchb_rh850
+#define helper_atomic_umax_fetchb helper_atomic_umax_fetchb_rh850
+#define helper_atomic_cmpxchgw_le helper_atomic_cmpxchgw_le_rh850
+#define helper_atomic_xchgw_le helper_atomic_xchgw_le_rh850
+#define helper_atomic_fetch_addw_le helper_atomic_fetch_addw_le_rh850
+#define helper_atomic_fetch_andw_le helper_atomic_fetch_andw_le_rh850
+#define helper_atomic_fetch_orw_le helper_atomic_fetch_orw_le_rh850
+#define helper_atomic_fetch_xorw_le helper_atomic_fetch_xorw_le_rh850
+#define helper_atomic_add_fetchw_le helper_atomic_add_fetchw_le_rh850
+#define helper_atomic_and_fetchw_le helper_atomic_and_fetchw_le_rh850
+#define helper_atomic_or_fetchw_le helper_atomic_or_fetchw_le_rh850
+#define helper_atomic_xor_fetchw_le helper_atomic_xor_fetchw_le_rh850
+#define helper_atomic_fetch_sminw_le helper_atomic_fetch_sminw_le_rh850
+#define helper_atomic_fetch_uminw_le helper_atomic_fetch_uminw_le_rh850
+#define helper_atomic_fetch_smaxw_le helper_atomic_fetch_smaxw_le_rh850
+#define helper_atomic_fetch_umaxw_le helper_atomic_fetch_umaxw_le_rh850
+#define helper_atomic_smin_fetchw_le helper_atomic_smin_fetchw_le_rh850
+#define helper_atomic_umin_fetchw_le helper_atomic_umin_fetchw_le_rh850
+#define helper_atomic_smax_fetchw_le helper_atomic_smax_fetchw_le_rh850
+#define helper_atomic_umax_fetchw_le helper_atomic_umax_fetchw_le_rh850
+#define helper_atomic_cmpxchgw_be helper_atomic_cmpxchgw_be_rh850
+#define helper_atomic_xchgw_be helper_atomic_xchgw_be_rh850
+#define helper_atomic_fetch_andw_be helper_atomic_fetch_andw_be_rh850
+#define helper_atomic_fetch_orw_be helper_atomic_fetch_orw_be_rh850
+#define helper_atomic_fetch_xorw_be helper_atomic_fetch_xorw_be_rh850
+#define helper_atomic_and_fetchw_be helper_atomic_and_fetchw_be_rh850
+#define helper_atomic_or_fetchw_be helper_atomic_or_fetchw_be_rh850
+#define helper_atomic_xor_fetchw_be helper_atomic_xor_fetchw_be_rh850
+#define helper_atomic_fetch_sminw_be helper_atomic_fetch_sminw_be_rh850
+#define helper_atomic_fetch_uminw_be helper_atomic_fetch_uminw_be_rh850
+#define helper_atomic_fetch_smaxw_be helper_atomic_fetch_smaxw_be_rh850
+#define helper_atomic_fetch_umaxw_be helper_atomic_fetch_umaxw_be_rh850
+#define helper_atomic_smin_fetchw_be helper_atomic_smin_fetchw_be_rh850
+#define helper_atomic_umin_fetchw_be helper_atomic_umin_fetchw_be_rh850
+#define helper_atomic_smax_fetchw_be helper_atomic_smax_fetchw_be_rh850
+#define helper_atomic_umax_fetchw_be helper_atomic_umax_fetchw_be_rh850
+#define helper_atomic_fetch_addw_be helper_atomic_fetch_addw_be_rh850
+#define helper_atomic_add_fetchw_be helper_atomic_add_fetchw_be_rh850
+#define helper_atomic_cmpxchgl_le helper_atomic_cmpxchgl_le_rh850
+#define helper_atomic_xchgl_le helper_atomic_xchgl_le_rh850
+#define helper_atomic_fetch_addl_le helper_atomic_fetch_addl_le_rh850
+#define helper_atomic_fetch_andl_le helper_atomic_fetch_andl_le_rh850
+#define helper_atomic_fetch_orl_le helper_atomic_fetch_orl_le_rh850
+#define helper_atomic_fetch_xorl_le helper_atomic_fetch_xorl_le_rh850
+#define helper_atomic_add_fetchl_le helper_atomic_add_fetchl_le_rh850
+#define helper_atomic_and_fetchl_le helper_atomic_and_fetchl_le_rh850
+#define helper_atomic_or_fetchl_le helper_atomic_or_fetchl_le_rh850
+#define helper_atomic_xor_fetchl_le helper_atomic_xor_fetchl_le_rh850
+#define helper_atomic_fetch_sminl_le helper_atomic_fetch_sminl_le_rh850
+#define helper_atomic_fetch_uminl_le helper_atomic_fetch_uminl_le_rh850
+#define helper_atomic_fetch_smaxl_le helper_atomic_fetch_smaxl_le_rh850
+#define helper_atomic_fetch_umaxl_le helper_atomic_fetch_umaxl_le_rh850
+#define helper_atomic_smin_fetchl_le helper_atomic_smin_fetchl_le_rh850
+#define helper_atomic_umin_fetchl_le helper_atomic_umin_fetchl_le_rh850
+#define helper_atomic_smax_fetchl_le helper_atomic_smax_fetchl_le_rh850
+#define helper_atomic_umax_fetchl_le helper_atomic_umax_fetchl_le_rh850
+#define helper_atomic_cmpxchgl_be helper_atomic_cmpxchgl_be_rh850
+#define helper_atomic_xchgl_be helper_atomic_xchgl_be_rh850
+#define helper_atomic_fetch_andl_be helper_atomic_fetch_andl_be_rh850
+#define helper_atomic_fetch_orl_be helper_atomic_fetch_orl_be_rh850
+#define helper_atomic_fetch_xorl_be helper_atomic_fetch_xorl_be_rh850
+#define helper_atomic_and_fetchl_be helper_atomic_and_fetchl_be_rh850
+#define helper_atomic_or_fetchl_be helper_atomic_or_fetchl_be_rh850
+#define helper_atomic_xor_fetchl_be helper_atomic_xor_fetchl_be_rh850
+#define helper_atomic_fetch_sminl_be helper_atomic_fetch_sminl_be_rh850
+#define helper_atomic_fetch_uminl_be helper_atomic_fetch_uminl_be_rh850
+#define helper_atomic_fetch_smaxl_be helper_atomic_fetch_smaxl_be_rh850
+#define helper_atomic_fetch_umaxl_be helper_atomic_fetch_umaxl_be_rh850
+#define helper_atomic_smin_fetchl_be helper_atomic_smin_fetchl_be_rh850
+#define helper_atomic_umin_fetchl_be helper_atomic_umin_fetchl_be_rh850
+#define helper_atomic_smax_fetchl_be helper_atomic_smax_fetchl_be_rh850
+#define helper_atomic_umax_fetchl_be helper_atomic_umax_fetchl_be_rh850
+#define helper_atomic_fetch_addl_be helper_atomic_fetch_addl_be_rh850
+#define helper_atomic_add_fetchl_be helper_atomic_add_fetchl_be_rh850
+#define helper_atomic_cmpxchgq_le helper_atomic_cmpxchgq_le_rh850
+#define helper_atomic_xchgq_le helper_atomic_xchgq_le_rh850
+#define helper_atomic_fetch_addq_le helper_atomic_fetch_addq_le_rh850
+#define helper_atomic_fetch_andq_le helper_atomic_fetch_andq_le_rh850
+#define helper_atomic_fetch_orq_le helper_atomic_fetch_orq_le_rh850
+#define helper_atomic_fetch_xorq_le helper_atomic_fetch_xorq_le_rh850
+#define helper_atomic_add_fetchq_le helper_atomic_add_fetchq_le_rh850
+#define helper_atomic_and_fetchq_le helper_atomic_and_fetchq_le_rh850
+#define helper_atomic_or_fetchq_le helper_atomic_or_fetchq_le_rh850
+#define helper_atomic_xor_fetchq_le helper_atomic_xor_fetchq_le_rh850
+#define helper_atomic_fetch_sminq_le helper_atomic_fetch_sminq_le_rh850
+#define helper_atomic_fetch_uminq_le helper_atomic_fetch_uminq_le_rh850
+#define helper_atomic_fetch_smaxq_le helper_atomic_fetch_smaxq_le_rh850
+#define helper_atomic_fetch_umaxq_le helper_atomic_fetch_umaxq_le_rh850
+#define helper_atomic_smin_fetchq_le helper_atomic_smin_fetchq_le_rh850
+#define helper_atomic_umin_fetchq_le helper_atomic_umin_fetchq_le_rh850
+#define helper_atomic_smax_fetchq_le helper_atomic_smax_fetchq_le_rh850
+#define helper_atomic_umax_fetchq_le helper_atomic_umax_fetchq_le_rh850
+#define helper_atomic_cmpxchgq_be helper_atomic_cmpxchgq_be_rh850
+#define helper_atomic_xchgq_be helper_atomic_xchgq_be_rh850
+#define helper_atomic_fetch_andq_be helper_atomic_fetch_andq_be_rh850
+#define helper_atomic_fetch_orq_be helper_atomic_fetch_orq_be_rh850
+#define helper_atomic_fetch_xorq_be helper_atomic_fetch_xorq_be_rh850
+#define helper_atomic_and_fetchq_be helper_atomic_and_fetchq_be_rh850
+#define helper_atomic_or_fetchq_be helper_atomic_or_fetchq_be_rh850
+#define helper_atomic_xor_fetchq_be helper_atomic_xor_fetchq_be_rh850
+#define helper_atomic_fetch_sminq_be helper_atomic_fetch_sminq_be_rh850
+#define helper_atomic_fetch_uminq_be helper_atomic_fetch_uminq_be_rh850
+#define helper_atomic_fetch_smaxq_be helper_atomic_fetch_smaxq_be_rh850
+#define helper_atomic_fetch_umaxq_be helper_atomic_fetch_umaxq_be_rh850
+#define helper_atomic_smin_fetchq_be helper_atomic_smin_fetchq_be_rh850
+#define helper_atomic_umin_fetchq_be helper_atomic_umin_fetchq_be_rh850
+#define helper_atomic_smax_fetchq_be helper_atomic_smax_fetchq_be_rh850
+#define helper_atomic_umax_fetchq_be helper_atomic_umax_fetchq_be_rh850
+#define helper_atomic_fetch_addq_be helper_atomic_fetch_addq_be_rh850
+#define helper_atomic_add_fetchq_be helper_atomic_add_fetchq_be_rh850
+#define cpu_ldub_code cpu_ldub_code_rh850
+#define cpu_lduw_code cpu_lduw_code_rh850
+#define cpu_ldl_code cpu_ldl_code_rh850
+#define cpu_ldq_code cpu_ldq_code_rh850
+#define helper_div_i32 helper_div_i32_rh850
+#define helper_rem_i32 helper_rem_i32_rh850
+#define helper_divu_i32 helper_divu_i32_rh850
+#define helper_remu_i32 helper_remu_i32_rh850
+#define helper_shl_i64 helper_shl_i64_rh850
+#define helper_shr_i64 helper_shr_i64_rh850
+#define helper_sar_i64 helper_sar_i64_rh850
+#define helper_div_i64 helper_div_i64_rh850
+#define helper_rem_i64 helper_rem_i64_rh850
+#define helper_divu_i64 helper_divu_i64_rh850
+#define helper_remu_i64 helper_remu_i64_rh850
+#define helper_muluh_i64 helper_muluh_i64_rh850
+#define helper_mulsh_i64 helper_mulsh_i64_rh850
+#define helper_clz_i32 helper_clz_i32_rh850
+#define helper_ctz_i32 helper_ctz_i32_rh850
+#define helper_clz_i64 helper_clz_i64_rh850
+#define helper_ctz_i64 helper_ctz_i64_rh850
+#define helper_clrsb_i32 helper_clrsb_i32_rh850
+#define helper_clrsb_i64 helper_clrsb_i64_rh850
+#define helper_ctpop_i32 helper_ctpop_i32_rh850
+#define helper_ctpop_i64 helper_ctpop_i64_rh850
+#define helper_lookup_tb_ptr helper_lookup_tb_ptr_rh850
+#define helper_exit_atomic helper_exit_atomic_rh850
+#define helper_gvec_add8 helper_gvec_add8_rh850
+#define helper_gvec_add16 helper_gvec_add16_rh850
+#define helper_gvec_add32 helper_gvec_add32_rh850
+#define helper_gvec_add64 helper_gvec_add64_rh850
+#define helper_gvec_adds8 helper_gvec_adds8_rh850
+#define helper_gvec_adds16 helper_gvec_adds16_rh850
+#define helper_gvec_adds32 helper_gvec_adds32_rh850
+#define helper_gvec_adds64 helper_gvec_adds64_rh850
+#define helper_gvec_sub8 helper_gvec_sub8_rh850
+#define helper_gvec_sub16 helper_gvec_sub16_rh850
+#define helper_gvec_sub32 helper_gvec_sub32_rh850
+#define helper_gvec_sub64 helper_gvec_sub64_rh850
+#define helper_gvec_subs8 helper_gvec_subs8_rh850
+#define helper_gvec_subs16 helper_gvec_subs16_rh850
+#define helper_gvec_subs32 helper_gvec_subs32_rh850
+#define helper_gvec_subs64 helper_gvec_subs64_rh850
+#define helper_gvec_mul8 helper_gvec_mul8_rh850
+#define helper_gvec_mul16 helper_gvec_mul16_rh850
+#define helper_gvec_mul32 helper_gvec_mul32_rh850
+#define helper_gvec_mul64 helper_gvec_mul64_rh850
+#define helper_gvec_muls8 helper_gvec_muls8_rh850
+#define helper_gvec_muls16 helper_gvec_muls16_rh850
+#define helper_gvec_muls32 helper_gvec_muls32_rh850
+#define helper_gvec_muls64 helper_gvec_muls64_rh850
+#define helper_gvec_neg8 helper_gvec_neg8_rh850
+#define helper_gvec_neg16 helper_gvec_neg16_rh850
+#define helper_gvec_neg32 helper_gvec_neg32_rh850
+#define helper_gvec_neg64 helper_gvec_neg64_rh850
+#define helper_gvec_abs8 helper_gvec_abs8_rh850
+#define helper_gvec_abs16 helper_gvec_abs16_rh850
+#define helper_gvec_abs32 helper_gvec_abs32_rh850
+#define helper_gvec_abs64 helper_gvec_abs64_rh850
+#define helper_gvec_mov helper_gvec_mov_rh850
+#define helper_gvec_dup64 helper_gvec_dup64_rh850
+#define helper_gvec_dup32 helper_gvec_dup32_rh850
+#define helper_gvec_dup16 helper_gvec_dup16_rh850
+#define helper_gvec_dup8 helper_gvec_dup8_rh850
+#define helper_gvec_not helper_gvec_not_rh850
+#define helper_gvec_and helper_gvec_and_rh850
+#define helper_gvec_or helper_gvec_or_rh850
+#define helper_gvec_xor helper_gvec_xor_rh850
+#define helper_gvec_andc helper_gvec_andc_rh850
+#define helper_gvec_orc helper_gvec_orc_rh850
+#define helper_gvec_nand helper_gvec_nand_rh850
+#define helper_gvec_nor helper_gvec_nor_rh850
+#define helper_gvec_eqv helper_gvec_eqv_rh850
+#define helper_gvec_ands helper_gvec_ands_rh850
+#define helper_gvec_xors helper_gvec_xors_rh850
+#define helper_gvec_ors helper_gvec_ors_rh850
+#define helper_gvec_shl8i helper_gvec_shl8i_rh850
+#define helper_gvec_shl16i helper_gvec_shl16i_rh850
+#define helper_gvec_shl32i helper_gvec_shl32i_rh850
+#define helper_gvec_shl64i helper_gvec_shl64i_rh850
+#define helper_gvec_shr8i helper_gvec_shr8i_rh850
+#define helper_gvec_shr16i helper_gvec_shr16i_rh850
+#define helper_gvec_shr32i helper_gvec_shr32i_rh850
+#define helper_gvec_shr64i helper_gvec_shr64i_rh850
+#define helper_gvec_sar8i helper_gvec_sar8i_rh850
+#define helper_gvec_sar16i helper_gvec_sar16i_rh850
+#define helper_gvec_sar32i helper_gvec_sar32i_rh850
+#define helper_gvec_sar64i helper_gvec_sar64i_rh850
+#define helper_gvec_shl8v helper_gvec_shl8v_rh850
+#define helper_gvec_shl16v helper_gvec_shl16v_rh850
+#define helper_gvec_shl32v helper_gvec_shl32v_rh850
+#define helper_gvec_shl64v helper_gvec_shl64v_rh850
+#define helper_gvec_shr8v helper_gvec_shr8v_rh850
+#define helper_gvec_shr16v helper_gvec_shr16v_rh850
+#define helper_gvec_shr32v helper_gvec_shr32v_rh850
+#define helper_gvec_shr64v helper_gvec_shr64v_rh850
+#define helper_gvec_sar8v helper_gvec_sar8v_rh850
+#define helper_gvec_sar16v helper_gvec_sar16v_rh850
+#define helper_gvec_sar32v helper_gvec_sar32v_rh850
+#define helper_gvec_sar64v helper_gvec_sar64v_rh850
+#define helper_gvec_eq8 helper_gvec_eq8_rh850
+#define helper_gvec_ne8 helper_gvec_ne8_rh850
+#define helper_gvec_lt8 helper_gvec_lt8_rh850
+#define helper_gvec_le8 helper_gvec_le8_rh850
+#define helper_gvec_ltu8 helper_gvec_ltu8_rh850
+#define helper_gvec_leu8 helper_gvec_leu8_rh850
+#define helper_gvec_eq16 helper_gvec_eq16_rh850
+#define helper_gvec_ne16 helper_gvec_ne16_rh850
+#define helper_gvec_lt16 helper_gvec_lt16_rh850
+#define helper_gvec_le16 helper_gvec_le16_rh850
+#define helper_gvec_ltu16 helper_gvec_ltu16_rh850
+#define helper_gvec_leu16 helper_gvec_leu16_rh850
+#define helper_gvec_eq32 helper_gvec_eq32_rh850
+#define helper_gvec_ne32 helper_gvec_ne32_rh850
+#define helper_gvec_lt32 helper_gvec_lt32_rh850
+#define helper_gvec_le32 helper_gvec_le32_rh850
+#define helper_gvec_ltu32 helper_gvec_ltu32_rh850
+#define helper_gvec_leu32 helper_gvec_leu32_rh850
+#define helper_gvec_eq64 helper_gvec_eq64_rh850
+#define helper_gvec_ne64 helper_gvec_ne64_rh850
+#define helper_gvec_lt64 helper_gvec_lt64_rh850
+#define helper_gvec_le64 helper_gvec_le64_rh850
+#define helper_gvec_ltu64 helper_gvec_ltu64_rh850
+#define helper_gvec_leu64 helper_gvec_leu64_rh850
+#define helper_gvec_ssadd8 helper_gvec_ssadd8_rh850
+#define helper_gvec_ssadd16 helper_gvec_ssadd16_rh850
+#define helper_gvec_ssadd32 helper_gvec_ssadd32_rh850
+#define helper_gvec_ssadd64 helper_gvec_ssadd64_rh850
+#define helper_gvec_sssub8 helper_gvec_sssub8_rh850
+#define helper_gvec_sssub16 helper_gvec_sssub16_rh850
+#define helper_gvec_sssub32 helper_gvec_sssub32_rh850
+#define helper_gvec_sssub64 helper_gvec_sssub64_rh850
+#define helper_gvec_usadd8 helper_gvec_usadd8_rh850
+#define helper_gvec_usadd16 helper_gvec_usadd16_rh850
+#define helper_gvec_usadd32 helper_gvec_usadd32_rh850
+#define helper_gvec_usadd64 helper_gvec_usadd64_rh850
+#define helper_gvec_ussub8 helper_gvec_ussub8_rh850
+#define helper_gvec_ussub16 helper_gvec_ussub16_rh850
+#define helper_gvec_ussub32 helper_gvec_ussub32_rh850
+#define helper_gvec_ussub64 helper_gvec_ussub64_rh850
+#define helper_gvec_smin8 helper_gvec_smin8_rh850
+#define helper_gvec_smin16 helper_gvec_smin16_rh850
+#define helper_gvec_smin32 helper_gvec_smin32_rh850
+#define helper_gvec_smin64 helper_gvec_smin64_rh850
+#define helper_gvec_smax8 helper_gvec_smax8_rh850
+#define helper_gvec_smax16 helper_gvec_smax16_rh850
+#define helper_gvec_smax32 helper_gvec_smax32_rh850
+#define helper_gvec_smax64 helper_gvec_smax64_rh850
+#define helper_gvec_umin8 helper_gvec_umin8_rh850
+#define helper_gvec_umin16 helper_gvec_umin16_rh850
+#define helper_gvec_umin32 helper_gvec_umin32_rh850
+#define helper_gvec_umin64 helper_gvec_umin64_rh850
+#define helper_gvec_umax8 helper_gvec_umax8_rh850
+#define helper_gvec_umax16 helper_gvec_umax16_rh850
+#define helper_gvec_umax32 helper_gvec_umax32_rh850
+#define helper_gvec_umax64 helper_gvec_umax64_rh850
+#define helper_gvec_bitsel helper_gvec_bitsel_rh850
+#define cpu_restore_state cpu_restore_state_rh850
+#define page_collection_lock page_collection_lock_rh850
+#define page_collection_unlock page_collection_unlock_rh850
+#define free_code_gen_buffer free_code_gen_buffer_rh850
+#define tcg_exec_init tcg_exec_init_rh850
+#define tb_cleanup tb_cleanup_rh850
+#define tb_flush tb_flush_rh850
+#define tb_phys_invalidate tb_phys_invalidate_rh850
+#define tb_gen_code tb_gen_code_rh850
+#define tb_exec_lock tb_exec_lock_rh850
+#define tb_exec_unlock tb_exec_unlock_rh850
+#define tb_invalidate_phys_page_range tb_invalidate_phys_page_range_rh850
+#define tb_invalidate_phys_range tb_invalidate_phys_range_rh850
+#define tb_invalidate_phys_page_fast tb_invalidate_phys_page_fast_rh850
+#define tb_check_watchpoint tb_check_watchpoint_rh850
+#define cpu_io_recompile cpu_io_recompile_rh850
+#define tb_flush_jmp_cache tb_flush_jmp_cache_rh850
+#define tcg_flush_softmmu_tlb tcg_flush_softmmu_tlb_rh850
+#define translator_loop_temp_check translator_loop_temp_check_rh850
+#define translator_loop translator_loop_rh850
+#define helper_atomic_cmpxchgo_le_mmu helper_atomic_cmpxchgo_le_mmu_rh850
+#define helper_atomic_cmpxchgo_be_mmu helper_atomic_cmpxchgo_be_mmu_rh850
+#define helper_atomic_ldo_le_mmu helper_atomic_ldo_le_mmu_rh850
+#define helper_atomic_ldo_be_mmu helper_atomic_ldo_be_mmu_rh850
+#define helper_atomic_sto_le_mmu helper_atomic_sto_le_mmu_rh850
+#define helper_atomic_sto_be_mmu helper_atomic_sto_be_mmu_rh850
+#define unassigned_mem_ops unassigned_mem_ops_rh850
+#define floatx80_infinity floatx80_infinity_rh850
+#define dup_const_func dup_const_func_rh850
+#define gen_helper_raise_exception gen_helper_raise_exception_rh850
+#define gen_helper_raise_interrupt gen_helper_raise_interrupt_rh850
+#define gen_helper_vfp_get_fpscr gen_helper_vfp_get_fpscr_rh850
+#define gen_helper_vfp_set_fpscr gen_helper_vfp_set_fpscr_rh850
+#define gen_helper_cpsr_read gen_helper_cpsr_read_rh850
+#define gen_helper_cpsr_write gen_helper_cpsr_write_rh850
+#define restore_state_to_opc restore_state_to_opc_rh850
+#define helper_tlb_flush helper_tlb_flush_rh850
+#define helper_uc_rh850_exit helper_uc_rh850_exit_rh850
+#define gen_intermediate_code gen_intermediate_code_rh850
+#endif
diff --git a/qemu/target/avr/cpu-param.h b/qemu/target/avr/cpu-param.h
new file mode 100644
index 0000000000..7ef4e7c679
--- /dev/null
+++ b/qemu/target/avr/cpu-param.h
@@ -0,0 +1,36 @@
+/*
+ * QEMU AVR CPU
+ *
+ * Copyright (c) 2016-2020 Michael Rolnik
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#ifndef AVR_CPU_PARAM_H
+#define AVR_CPU_PARAM_H
+
+#define TARGET_LONG_BITS 32
+/*
+ * TARGET_PAGE_BITS cannot be more than 8 bits because
+ * 1.  all IO registers occupy [0x0000 .. 0x00ff] address range, and they
+ *     should be implemented as a device and not memory
+ * 2.  SRAM starts at the address 0x0100
+ */
+#define TARGET_PAGE_BITS 8
+#define TARGET_PHYS_ADDR_SPACE_BITS 24
+#define TARGET_VIRT_ADDR_SPACE_BITS 24
+#define NB_MMU_MODES 2
+
+#endif
diff --git a/qemu/target/avr/cpu-qom.h b/qemu/target/avr/cpu-qom.h
new file mode 100644
index 0000000000..9ba1ea1b37
--- /dev/null
+++ b/qemu/target/avr/cpu-qom.h
@@ -0,0 +1,56 @@
+/*
+ * QEMU AVR CPU
+ *
+ * Copyright (c) 2016-2020 Michael Rolnik
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#ifndef QEMU_AVR_QOM_H
+#define QEMU_AVR_QOM_H
+
+#include "hw/core/cpu.h"
+
+typedef void Object;
+typedef void ObjectClass;
+
+typedef void DeviceState;
+typedef void (*DeviceRealize)(DeviceState *ds);
+typedef void (*DeviceReset)(DeviceState *ds);
+
+#define TYPE_AVR_CPU "avr-cpu"
+
+#define AVR_CPU(obj) ((AVRCPU *)obj)
+#define AVR_CPU_CLASS(klass) ((AVRCPUClass *)klass)
+#define AVR_CPU_GET_CLASS(obj) (&((AVRCPU *)obj)->cc)
+
+/**
+ *  AVRCPUClass:
+ *  @parent_realize: The parent class' realize handler.
+ *  @parent_reset: The parent class' reset handler.
+ *  @vr: Version Register value.
+ *
+ *  A AVR CPU model.
+ */
+typedef struct AVRCPUClass {
+    /*< private >*/
+    CPUClass parent_class;
+    /*< public >*/
+    DeviceRealize parent_realize;
+    DeviceReset parent_reset;
+} AVRCPUClass;
+
+
+#endif /* !defined (QEMU_AVR_CPU_QOM_H) */
diff --git a/qemu/target/avr/cpu.c b/qemu/target/avr/cpu.c
new file mode 100644
index 0000000000..c062723814
--- /dev/null
+++ b/qemu/target/avr/cpu.c
@@ -0,0 +1,459 @@
+/*
+ * QEMU AVR CPU
+ *
+ * Copyright (c) 2019-2020 Michael Rolnik
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#include "qemu/osdep.h"
+#include "exec/exec-all.h"
+#include "cpu.h"
+#include "unicorn_helper.h"
+
+static void avr_cpu_set_pc(CPUState *cs, vaddr value)
+{
+    AVRCPU *cpu = AVR_CPU(cs);
+
+    cpu->env.pc_w = value / 2; /* internally PC points to words */
+}
+
+static bool avr_cpu_has_work(CPUState *cs)
+{
+    AVRCPU *cpu = AVR_CPU(cs);
+    CPUAVRState *env = &cpu->env;
+
+    return (cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_RESET))
+            && cpu_interrupts_enabled(env);
+}
+
+static void avr_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+{
+    AVRCPU *cpu = AVR_CPU(cs);
+    CPUAVRState *env = &cpu->env;
+
+    env->pc_w = tb->pc / 2; /* internally PC points to words */
+}
+
+static void avr_cpu_reset(CPUState *cs)
+{
+    AVRCPU *cpu = AVR_CPU(cs);
+    AVRCPUClass *mcc = AVR_CPU_GET_CLASS(cpu);
+    CPUAVRState *env = &cpu->env;
+
+    if (mcc->parent_reset)
+        mcc->parent_reset(cs);
+
+    env->pc_w = 0;
+    env->sregI = 1;
+    env->sregC = 0;
+    env->sregZ = 0;
+    env->sregN = 0;
+    env->sregV = 0;
+    env->sregS = 0;
+    env->sregH = 0;
+    env->sregT = 0;
+
+    env->rampD = 0;
+    env->rampX = 0;
+    env->rampY = 0;
+    env->rampZ = 0;
+    env->eind = 0;
+    env->sp = 0;
+
+    env->skip = 0;
+
+    memset(env->r, 0, sizeof(env->r));
+}
+
+#if 0
+static void avr_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
+{
+    info->mach = bfd_arch_avr;
+    info->print_insn = avr_print_insn;
+}
+#endif
+
+static void avr_cpu_realizefn(DeviceState *dev)
+{
+    CPUState *cs = CPU(dev);
+    AVRCPUClass *mcc = AVR_CPU_GET_CLASS(dev);
+
+    cpu_exec_realizefn(cs);
+    qemu_init_vcpu(cs);
+    cpu_reset(cs);
+
+    if (mcc->parent_realize)
+        mcc->parent_realize(dev);
+}
+
+#if 0
+static void avr_cpu_set_int(void *opaque, int irq, int level)
+{
+    AVRCPU *cpu = opaque;
+    CPUAVRState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
+    uint64_t mask = (1ull << irq);
+
+    if (level) {
+        env->intsrc |= mask;
+        cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+    } else {
+        env->intsrc &= ~mask;
+        if (env->intsrc == 0) {
+            cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+        }
+    }
+}
+#endif
+
+static void avr_cpu_initfn(Object *obj, struct uc_struct *uc)
+{
+    AVRCPU *cpu = AVR_CPU(obj);
+    CPUAVRState *const env = &cpu->env;
+
+    env->uc = uc;
+    cpu_set_cpustate_pointers(cpu);
+
+#if 0
+    /* Set the number of interrupts supported by the CPU. */
+    qdev_init_gpio_in(DEVICE(cpu), avr_cpu_set_int,
+                      sizeof(cpu->env.intsrc) * 8);
+#endif
+}
+
+#if 0
+static ObjectClass *avr_cpu_class_by_name(const char *cpu_model)
+{
+    ObjectClass *oc;
+
+    oc = object_class_by_name(cpu_model);
+    if (object_class_dynamic_cast(oc, TYPE_AVR_CPU) == NULL ||
+        object_class_is_abstract(oc)) {
+        oc = NULL;
+    }
+    return oc;
+}
+#endif
+
+#if 0
+static void avr_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+{
+    AVRCPU *cpu = AVR_CPU(cs);
+    CPUAVRState *env = &cpu->env;
+    int i;
+
+    qemu_fprintf(f, "\n");
+    qemu_fprintf(f, "PC:    %06x\n", env->pc_w * 2); /* PC points to words */
+    qemu_fprintf(f, "SP:      %04x\n", env->sp);
+    qemu_fprintf(f, "rampD:     %02x\n", env->rampD >> 16);
+    qemu_fprintf(f, "rampX:     %02x\n", env->rampX >> 16);
+    qemu_fprintf(f, "rampY:     %02x\n", env->rampY >> 16);
+    qemu_fprintf(f, "rampZ:     %02x\n", env->rampZ >> 16);
+    qemu_fprintf(f, "EIND:      %02x\n", env->eind >> 16);
+    qemu_fprintf(f, "X:       %02x%02x\n", env->r[27], env->r[26]);
+    qemu_fprintf(f, "Y:       %02x%02x\n", env->r[29], env->r[28]);
+    qemu_fprintf(f, "Z:       %02x%02x\n", env->r[31], env->r[30]);
+    qemu_fprintf(f, "SREG:    [ %c %c %c %c %c %c %c %c ]\n",
+                 env->sregI ? 'I' : '-',
+                 env->sregT ? 'T' : '-',
+                 env->sregH ? 'H' : '-',
+                 env->sregS ? 'S' : '-',
+                 env->sregV ? 'V' : '-',
+                 env->sregN ? '-' : 'N', /* Zf has negative logic */
+                 env->sregZ ? 'Z' : '-',
+                 env->sregC ? 'I' : '-');
+    qemu_fprintf(f, "SKIP:    %02x\n", env->skip);
+
+    qemu_fprintf(f, "\n");
+    for (i = 0; i < ARRAY_SIZE(env->r); i++) {
+        qemu_fprintf(f, "R[%02d]:  %02x   ", i, env->r[i]);
+
+        if ((i % 8) == 7) {
+            qemu_fprintf(f, "\n");
+        }
+    }
+    qemu_fprintf(f, "\n");
+}
+#endif
+
+static void avr_cpu_class_init(ObjectClass *oc, void *data)
+{
+    CPUClass *cc = CPU_CLASS(oc);
+    AVRCPUClass *mcc = AVR_CPU_CLASS(oc);
+
+    mcc->parent_realize = NULL;
+    mcc->parent_reset = NULL;
+
+#if 0
+    cc->class_by_name = avr_cpu_class_by_name;
+#endif
+
+    cc->reset = avr_cpu_reset;
+    cc->has_work = avr_cpu_has_work;
+    cc->do_interrupt = avr_cpu_do_interrupt;
+    cc->cpu_exec_interrupt = avr_cpu_exec_interrupt;
+#if 0
+    cc->dump_state = avr_cpu_dump_state;
+#endif
+    cc->set_pc = avr_cpu_set_pc;
+#if 0
+    cc->memory_rw_debug = avr_cpu_memory_rw_debug;
+#endif
+    cc->get_phys_page_debug = avr_cpu_get_phys_page_debug;
+    cc->tlb_fill = avr_cpu_tlb_fill;
+#if 0
+    cc->vmsd = &vms_avr_cpu;
+    cc->disas_set_info = avr_cpu_disas_set_info;
+#endif
+    cc->tcg_initialize = avr_cpu_tcg_init;
+    cc->synchronize_from_tb = avr_cpu_synchronize_from_tb;
+#if 0
+    cc->gdb_read_register = avr_cpu_gdb_read_register;
+    cc->gdb_write_register = avr_cpu_gdb_write_register;
+    cc->gdb_num_core_regs = 35;
+    cc->gdb_core_xml_file = "avr-cpu.xml";
+#endif
+}
+
+/*
+ * Setting features of AVR core type avr5
+ * --------------------------------------
+ *
+ * This type of AVR core is present in the following AVR MCUs:
+ *
+ * ata5702m322, ata5782, ata5790, ata5790n, ata5791, ata5795, ata5831, ata6613c,
+ * ata6614q, ata8210, ata8510, atmega16, atmega16a, atmega161, atmega162,
+ * atmega163, atmega164a, atmega164p, atmega164pa, atmega165, atmega165a,
+ * atmega165p, atmega165pa, atmega168, atmega168a, atmega168p, atmega168pa,
+ * atmega168pb, atmega169, atmega169a, atmega169p, atmega169pa, atmega16hvb,
+ * atmega16hvbrevb, atmega16m1, atmega16u4, atmega32a, atmega32, atmega323,
+ * atmega324a, atmega324p, atmega324pa, atmega325, atmega325a, atmega325p,
+ * atmega325pa, atmega3250, atmega3250a, atmega3250p, atmega3250pa, atmega328,
+ * atmega328p, atmega328pb, atmega329, atmega329a, atmega329p, atmega329pa,
+ * atmega3290, atmega3290a, atmega3290p, atmega3290pa, atmega32c1, atmega32m1,
+ * atmega32u4, atmega32u6, atmega406, atmega64, atmega64a, atmega640, atmega644,
+ * atmega644a, atmega644p, atmega644pa, atmega645, atmega645a, atmega645p,
+ * atmega6450, atmega6450a, atmega6450p, atmega649, atmega649a, atmega649p,
+ * atmega6490, atmega16hva, atmega16hva2, atmega32hvb, atmega6490a, atmega6490p,
+ * atmega64c1, atmega64m1, atmega64hve, atmega64hve2, atmega64rfr2,
+ * atmega644rfr2, atmega32hvbrevb, at90can32, at90can64, at90pwm161, at90pwm216,
+ * at90pwm316, at90scr100, at90usb646, at90usb647, at94k, m3000
+ */
+static void avr_avr5_initfn(Object *obj)
+{
+    AVRCPU *cpu = AVR_CPU(obj);
+    CPUAVRState *env = &cpu->env;
+
+    set_avr_feature(env, AVR_FEATURE_LPM);
+    set_avr_feature(env, AVR_FEATURE_IJMP_ICALL);
+    set_avr_feature(env, AVR_FEATURE_ADIW_SBIW);
+    set_avr_feature(env, AVR_FEATURE_SRAM);
+    set_avr_feature(env, AVR_FEATURE_BREAK);
+
+    set_avr_feature(env, AVR_FEATURE_2_BYTE_PC);
+    set_avr_feature(env, AVR_FEATURE_2_BYTE_SP);
+    set_avr_feature(env, AVR_FEATURE_JMP_CALL);
+    set_avr_feature(env, AVR_FEATURE_LPMX);
+    set_avr_feature(env, AVR_FEATURE_MOVW);
+    set_avr_feature(env, AVR_FEATURE_MUL);
+}
+
+/*
+ * Setting features of AVR core type avr51
+ * --------------------------------------
+ *
+ * This type of AVR core is present in the following AVR MCUs:
+ *
+ * atmega128, atmega128a, atmega1280, atmega1281, atmega1284, atmega1284p,
+ * atmega128rfa1, atmega128rfr2, atmega1284rfr2, at90can128, at90usb1286,
+ * at90usb1287
+ */
+static void avr_avr51_initfn(Object *obj)
+{
+    AVRCPU *cpu = AVR_CPU(obj);
+    CPUAVRState *env = &cpu->env;
+
+    set_avr_feature(env, AVR_FEATURE_LPM);
+    set_avr_feature(env, AVR_FEATURE_IJMP_ICALL);
+    set_avr_feature(env, AVR_FEATURE_ADIW_SBIW);
+    set_avr_feature(env, AVR_FEATURE_SRAM);
+    set_avr_feature(env, AVR_FEATURE_BREAK);
+
+    set_avr_feature(env, AVR_FEATURE_2_BYTE_PC);
+    set_avr_feature(env, AVR_FEATURE_2_BYTE_SP);
+    set_avr_feature(env, AVR_FEATURE_RAMPZ);
+    set_avr_feature(env, AVR_FEATURE_ELPMX);
+    set_avr_feature(env, AVR_FEATURE_ELPM);
+    set_avr_feature(env, AVR_FEATURE_JMP_CALL);
+    set_avr_feature(env, AVR_FEATURE_LPMX);
+    set_avr_feature(env, AVR_FEATURE_MOVW);
+    set_avr_feature(env, AVR_FEATURE_MUL);
+}
+
+/*
+ * Setting features of AVR core type avr6
+ * --------------------------------------
+ *
+ * This type of AVR core is present in the following AVR MCUs:
+ *
+ * atmega2560, atmega2561, atmega256rfr2, atmega2564rfr2
+ */
+static void avr_avr6_initfn(Object *obj)
+{
+    AVRCPU *cpu = AVR_CPU(obj);
+    CPUAVRState *env = &cpu->env;
+
+    set_avr_feature(env, AVR_FEATURE_LPM);
+    set_avr_feature(env, AVR_FEATURE_IJMP_ICALL);
+    set_avr_feature(env, AVR_FEATURE_ADIW_SBIW);
+    set_avr_feature(env, AVR_FEATURE_SRAM);
+    set_avr_feature(env, AVR_FEATURE_BREAK);
+
+    set_avr_feature(env, AVR_FEATURE_3_BYTE_PC);
+    set_avr_feature(env, AVR_FEATURE_2_BYTE_SP);
+    set_avr_feature(env, AVR_FEATURE_RAMPZ);
+    set_avr_feature(env, AVR_FEATURE_EIJMP_EICALL);
+    set_avr_feature(env, AVR_FEATURE_ELPMX);
+    set_avr_feature(env, AVR_FEATURE_ELPM);
+    set_avr_feature(env, AVR_FEATURE_JMP_CALL);
+    set_avr_feature(env, AVR_FEATURE_LPMX);
+    set_avr_feature(env, AVR_FEATURE_MOVW);
+    set_avr_feature(env, AVR_FEATURE_MUL);
+}
+
+typedef struct AVRCPUInfo {
+    int model;
+    const char *name;
+    void (*initfn)(Object *obj);
+} AVRCPUInfo;
+
+static const AVRCPUInfo avr_cpu_info[] ={
+    {UC_CPU_AVR_ATMEGA16, "arch:avr5", avr_avr5_initfn},
+    {UC_CPU_AVR_ATMEGA16, "atmega16", avr_avr5_initfn},
+    {UC_CPU_AVR_ATMEGA32, "atmega32", avr_avr5_initfn},
+    {UC_CPU_AVR_ATMEGA64, "atmega64", avr_avr5_initfn},
+
+    {UC_CPU_AVR_ATMEGA128, "arch:avr51", avr_avr51_initfn},
+    {UC_CPU_AVR_ATMEGA128, "atmega128", avr_avr51_initfn},
+    {UC_CPU_AVR_ATMEGA128RFR2, "atmega128rfr2", avr_avr51_initfn},
+    {UC_CPU_AVR_ATMEGA1280, "atmega1280", avr_avr51_initfn},
+
+    {UC_CPU_AVR_ATMEGA256, "arch:avr6", avr_avr6_initfn},
+    {UC_CPU_AVR_ATMEGA256RFR2, "atmega256rfr2", avr_avr6_initfn},
+    {UC_CPU_AVR_ATMEGA2560, "atmega2560", avr_avr6_initfn},
+};
+
+static const AVRCPUInfo *avr_cpu_info_get(int cpu_model)
+{
+    for (int i = 0; i < ARRAY_SIZE(avr_cpu_info); i++) {
+        const AVRCPUInfo *const cip = &avr_cpu_info[i];
+        if (cpu_model == cip->model)
+            return cip;
+    }
+    return NULL;
+}
+
+DEFAULT_VISIBILITY
+int avr_cpu_model_valid(int cpu_model)
+{
+    return avr_cpu_info_get(cpu_model) != NULL;
+}
+
+#if 0
+static void avr_cpu_list_entry(gpointer data, gpointer user_data)
+{
+    const char *typename = object_class_get_name(OBJECT_CLASS(data));
+
+    qemu_printf("%s\n", typename);
+}
+
+void avr_cpu_list(void)
+{
+    GSList *list;
+    list = object_class_get_list_sorted(TYPE_AVR_CPU, false);
+    g_slist_foreach(list, avr_cpu_list_entry, NULL);
+    g_slist_free(list);
+}
+
+#define DEFINE_AVR_CPU_TYPE(model, initfn) \
+    { \
+        .parent = TYPE_AVR_CPU, \
+        .instance_init = initfn, \
+        .name = AVR_CPU_TYPE_NAME(model), \
+    }
+
+static const TypeInfo avr_cpu_type_info[] = {
+    {
+        .name = TYPE_AVR_CPU,
+        .parent = TYPE_CPU,
+        .instance_size = sizeof(AVRCPU),
+        .instance_init = avr_cpu_initfn,
+        .class_size = sizeof(AVRCPUClass),
+        .class_init = avr_cpu_class_init,
+        .abstract = true,
+    },
+    DEFINE_AVR_CPU_TYPE("avr5", avr_avr5_initfn),
+    DEFINE_AVR_CPU_TYPE("avr51", avr_avr51_initfn),
+    DEFINE_AVR_CPU_TYPE("avr6", avr_avr6_initfn),
+};
+
+DEFINE_TYPES(avr_cpu_type_info)
+#endif
+
+AVRCPU *cpu_avr_init(struct uc_struct *uc)
+{
+    AVRCPU *cpu;
+    CPUState *cs;
+    CPUClass *cc;
+    ObjectClass *oc;
+
+    cpu = qemu_memalign(8, sizeof(*cpu));
+    if (cpu == NULL) {
+        return NULL;
+    }
+    memset((void *)cpu, 0, sizeof(*cpu));
+
+    if (uc->cpu_model == INT_MAX)
+        uc->cpu_model = UC_CPU_AVR_ATMEGA128;
+    const AVRCPUInfo *const cip = avr_cpu_info_get(uc->cpu_model);
+    if (!cip) {
+        qemu_vfree(cpu);
+        return NULL;
+    }
+
+    cs = &cpu->parent_obj;
+    cc = &AVR_CPU_GET_CLASS(cpu)->parent_class;
+    oc = (ObjectClass *)cc;
+    cs->cc = cc;
+    cs->uc = uc;
+    uc->cpu = cs;
+
+    cpu_class_init(uc, cc);
+    avr_cpu_class_init(oc, NULL);
+
+    cpu_common_initfn(uc, cs);
+    avr_cpu_initfn(cs, uc);
+    cip->initfn(cs);
+
+    avr_cpu_realizefn(cs);
+
+    // init address space
+    cpu_address_space_init(cs, 0, cs->memory);
+
+    qemu_init_vcpu(cs);
+
+    return cpu;
+}
diff --git a/qemu/target/avr/cpu.h b/qemu/target/avr/cpu.h
new file mode 100644
index 0000000000..f7781c7ffe
--- /dev/null
+++ b/qemu/target/avr/cpu.h
@@ -0,0 +1,274 @@
+/*
+ * QEMU AVR CPU
+ *
+ * Copyright (c) 2016-2020 Michael Rolnik
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#ifndef QEMU_AVR_CPU_H
+#define QEMU_AVR_CPU_H
+
+#include "cpu-qom.h"
+#include "exec/cpu-defs.h"
+
+#ifdef CONFIG_USER_ONLY
+#error "AVR 8-bit does not support user mode"
+#endif
+
+#define AVR_CPU_TYPE_SUFFIX "-" TYPE_AVR_CPU
+#define AVR_CPU_TYPE_NAME(name) (name AVR_CPU_TYPE_SUFFIX)
+#define CPU_RESOLVING_TYPE TYPE_AVR_CPU
+
+#define TCG_GUEST_DEFAULT_MO 0
+
+/*
+ * AVR has two memory spaces, data & code.
+ * e.g. both have 0 address
+ * ST/LD instructions access data space
+ * LPM/SPM and instruction fetching access code memory space
+ */
+#define MMU_CODE_IDX 0
+#define MMU_DATA_IDX 1
+
+#define EXCP_RESET 1
+#define EXCP_INT(n) (EXCP_RESET + (n) + 1)
+
+/* Number of CPU registers */
+#define NUMBER_OF_CPU_REGISTERS 32
+/* Number of IO registers accessible by ld/st/in/out */
+#define NUMBER_OF_IO_REGISTERS 64
+
+/*
+ * Offsets of AVR memory regions in host memory space.
+ *
+ * This is needed because the AVR has separate code and data address
+ * spaces that both have start from zero but have to go somewhere in
+ * host memory.
+ *
+ * It's also useful to know where some things are, like the IO registers.
+ */
+#if 1
+// Unicorn:
+#define OFFSET_CODE 0x08000000 /* UC_AVR_MEM_FLASH */
+#define OFFSET_DATA 0x00000000
+#else
+/* Flash program memory */
+#define OFFSET_CODE 0x00000000
+/* CPU registers, IO registers, and SRAM */
+#define OFFSET_DATA 0x00800000
+#endif
+/* CPU registers specifically, these are mapped at the start of data */
+#define OFFSET_CPU_REGISTERS OFFSET_DATA
+/*
+ * IO registers, including status register, stack pointer, and memory
+ * mapped peripherals, mapped just after CPU registers
+ */
+#define OFFSET_IO_REGISTERS (OFFSET_DATA + NUMBER_OF_CPU_REGISTERS)
+
+typedef enum AVRFeature {
+    AVR_FEATURE_SRAM,
+
+    AVR_FEATURE_1_BYTE_PC,
+    AVR_FEATURE_2_BYTE_PC,
+    AVR_FEATURE_3_BYTE_PC,
+
+    AVR_FEATURE_1_BYTE_SP,
+    AVR_FEATURE_2_BYTE_SP,
+
+    AVR_FEATURE_BREAK,
+    AVR_FEATURE_DES,
+    AVR_FEATURE_RMW, /* Read Modify Write - XCH LAC LAS LAT */
+
+    AVR_FEATURE_EIJMP_EICALL,
+    AVR_FEATURE_IJMP_ICALL,
+    AVR_FEATURE_JMP_CALL,
+
+    AVR_FEATURE_ADIW_SBIW,
+
+    AVR_FEATURE_SPM,
+    AVR_FEATURE_SPMX,
+
+    AVR_FEATURE_ELPMX,
+    AVR_FEATURE_ELPM,
+    AVR_FEATURE_LPMX,
+    AVR_FEATURE_LPM,
+
+    AVR_FEATURE_MOVW,
+    AVR_FEATURE_MUL,
+    AVR_FEATURE_RAMPD,
+    AVR_FEATURE_RAMPX,
+    AVR_FEATURE_RAMPY,
+    AVR_FEATURE_RAMPZ,
+
+    AVR_FEATURE_FLASH, /* Unicorn: was Flash program memory mapped? */
+} AVRFeature;
+
+typedef struct CPUAVRState CPUAVRState;
+
+struct CPUAVRState {
+    uint32_t pc_w; /* 0x003fffff up to 22 bits */
+
+    uint32_t sregC; /* 0x00000001 1 bit */
+    uint32_t sregZ; /* 0x00000001 1 bit */
+    uint32_t sregN; /* 0x00000001 1 bit */
+    uint32_t sregV; /* 0x00000001 1 bit */
+    uint32_t sregS; /* 0x00000001 1 bit */
+    uint32_t sregH; /* 0x00000001 1 bit */
+    uint32_t sregT; /* 0x00000001 1 bit */
+    uint32_t sregI; /* 0x00000001 1 bit */
+
+    uint32_t rampD; /* 0x00ff0000 8 bits */
+    uint32_t rampX; /* 0x00ff0000 8 bits */
+    uint32_t rampY; /* 0x00ff0000 8 bits */
+    uint32_t rampZ; /* 0x00ff0000 8 bits */
+    uint32_t eind; /* 0x00ff0000 8 bits */
+
+    uint32_t r[NUMBER_OF_CPU_REGISTERS]; /* 8 bits each */
+    uint32_t sp; /* 16 bits */
+
+    uint32_t skip; /* if set skip instruction */
+
+    uint64_t intsrc; /* interrupt sources */
+    bool fullacc; /* CPU/MEM if true MEM only otherwise */
+
+    uint64_t features;
+
+    // Unicorn engine
+    struct uc_struct *uc;
+};
+
+/**
+ *  AVRCPU:
+ *  @env: #CPUAVRState
+ *
+ *  A AVR CPU.
+ */
+typedef struct AVRCPU {
+    /*< private >*/
+    CPUState parent_obj;
+    /*< public >*/
+
+    CPUNegativeOffsetState neg;
+    CPUAVRState env;
+
+    AVRCPUClass cc;
+} AVRCPU;
+
+extern const struct VMStateDescription vms_avr_cpu;
+
+void avr_cpu_do_interrupt(CPUState *cpu);
+bool avr_cpu_exec_interrupt(CPUState *cpu, int int_req);
+hwaddr avr_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
+int avr_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
+int avr_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+
+static inline int avr_feature(CPUAVRState *env, AVRFeature feature)
+{
+    return (env->features & (1U << feature)) != 0;
+}
+
+static inline void set_avr_feature(CPUAVRState *env, int feature)
+{
+    env->features |= (1U << feature);
+}
+
+#define cpu_list avr_cpu_list
+#define cpu_signal_handler cpu_avr_signal_handler
+#define cpu_mmu_index avr_cpu_mmu_index
+
+static inline int avr_cpu_mmu_index(CPUAVRState *env, bool ifetch)
+{
+    return ifetch ? MMU_CODE_IDX : MMU_DATA_IDX;
+}
+
+static inline uint32_t avr_code_base(CPUAVRState *env)
+{
+    return OFFSET_CODE && avr_feature(env, AVR_FEATURE_FLASH) ?
+        OFFSET_CODE : 0;
+}
+
+void avr_cpu_tcg_init(struct uc_struct *uc);
+
+void avr_cpu_list(void);
+int cpu_avr_exec(CPUState *cpu);
+int cpu_avr_signal_handler(int host_signum, void *pinfo, void *puc);
+int avr_cpu_memory_rw_debug(CPUState *cs, vaddr address, uint8_t *buf,
+                            int len, bool is_write);
+
+enum {
+    TB_FLAGS_FULL_ACCESS = 1,
+    TB_FLAGS_SKIP = 2,
+};
+
+static inline void cpu_get_tb_cpu_state(CPUAVRState *env, target_ulong *pc,
+                                        target_ulong *cs_base, uint32_t *pflags)
+{
+    uint32_t flags = 0;
+
+    *pc = env->pc_w * 2;
+    *cs_base = 0;
+
+    if (env->fullacc) {
+        flags |= TB_FLAGS_FULL_ACCESS;
+    }
+    if (env->skip) {
+        flags |= TB_FLAGS_SKIP;
+    }
+
+    *pflags = flags;
+}
+
+static inline int cpu_interrupts_enabled(CPUAVRState *env)
+{
+    return env->sregI != 0;
+}
+
+static inline uint8_t cpu_get_sreg(CPUAVRState *env)
+{
+    uint8_t sreg;
+    sreg = (env->sregC) << 0
+         | (env->sregZ) << 1
+         | (env->sregN) << 2
+         | (env->sregV) << 3
+         | (env->sregS) << 4
+         | (env->sregH) << 5
+         | (env->sregT) << 6
+         | (env->sregI) << 7;
+    return sreg;
+}
+
+static inline void cpu_set_sreg(CPUAVRState *env, uint8_t sreg)
+{
+    env->sregC = (sreg >> 0) & 0x01;
+    env->sregZ = (sreg >> 1) & 0x01;
+    env->sregN = (sreg >> 2) & 0x01;
+    env->sregV = (sreg >> 3) & 0x01;
+    env->sregS = (sreg >> 4) & 0x01;
+    env->sregH = (sreg >> 5) & 0x01;
+    env->sregT = (sreg >> 6) & 0x01;
+    env->sregI = (sreg >> 7) & 0x01;
+}
+
+bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                      MMUAccessType access_type, int mmu_idx,
+                      bool probe, uintptr_t retaddr);
+
+typedef CPUAVRState CPUArchState;
+typedef AVRCPU ArchCPU;
+
+#include "exec/cpu-all.h"
+
+#endif /* !defined (QEMU_AVR_CPU_H) */
diff --git a/qemu/target/avr/decode-insn.c.inc b/qemu/target/avr/decode-insn.c.inc
new file mode 100644
index 0000000000..0e96565474
--- /dev/null
+++ b/qemu/target/avr/decode-insn.c.inc
@@ -0,0 +1,1097 @@
+/* This file is autogenerated by scripts/decodetree.py.  */
+
+typedef struct {
+    int bit;
+    int rd;
+} arg_decode_insn10;
+
+typedef struct {
+    int rd;
+} arg_decode_insn2;
+
+typedef struct {
+    int imm;
+} arg_decode_insn3;
+
+typedef struct {
+    int bit;
+} arg_decode_insn4;
+
+typedef struct {
+    int bit;
+    int imm;
+} arg_decode_insn5;
+
+typedef struct {
+    int noarg_;
+} arg_decode_insn6;
+
+typedef struct {
+    int bit;
+    int rr;
+} arg_decode_insn7;
+
+typedef struct {
+    int bit;
+    int reg;
+} arg_decode_insn8;
+
+typedef struct {
+    int rr;
+} arg_decode_insn9;
+
+typedef struct {
+    int imm;
+    int rd;
+} arg_rd_imm;
+
+typedef struct {
+    int rd;
+    int rr;
+} arg_rd_rr;
+
+typedef arg_rd_rr arg_ADD;
+static bool trans_ADD(DisasContext *ctx, arg_ADD *a);
+typedef arg_rd_rr arg_ADC;
+static bool trans_ADC(DisasContext *ctx, arg_ADC *a);
+typedef arg_rd_imm arg_ADIW;
+static bool trans_ADIW(DisasContext *ctx, arg_ADIW *a);
+typedef arg_rd_rr arg_SUB;
+static bool trans_SUB(DisasContext *ctx, arg_SUB *a);
+typedef arg_rd_imm arg_SUBI;
+static bool trans_SUBI(DisasContext *ctx, arg_SUBI *a);
+typedef arg_rd_rr arg_SBC;
+static bool trans_SBC(DisasContext *ctx, arg_SBC *a);
+typedef arg_rd_imm arg_SBCI;
+static bool trans_SBCI(DisasContext *ctx, arg_SBCI *a);
+typedef arg_rd_imm arg_SBIW;
+static bool trans_SBIW(DisasContext *ctx, arg_SBIW *a);
+typedef arg_rd_rr arg_AND;
+static bool trans_AND(DisasContext *ctx, arg_AND *a);
+typedef arg_rd_imm arg_ANDI;
+static bool trans_ANDI(DisasContext *ctx, arg_ANDI *a);
+typedef arg_rd_rr arg_OR;
+static bool trans_OR(DisasContext *ctx, arg_OR *a);
+typedef arg_rd_imm arg_ORI;
+static bool trans_ORI(DisasContext *ctx, arg_ORI *a);
+typedef arg_rd_rr arg_EOR;
+static bool trans_EOR(DisasContext *ctx, arg_EOR *a);
+typedef arg_decode_insn2 arg_COM;
+static bool trans_COM(DisasContext *ctx, arg_COM *a);
+typedef arg_decode_insn2 arg_NEG;
+static bool trans_NEG(DisasContext *ctx, arg_NEG *a);
+typedef arg_decode_insn2 arg_INC;
+static bool trans_INC(DisasContext *ctx, arg_INC *a);
+typedef arg_decode_insn2 arg_DEC;
+static bool trans_DEC(DisasContext *ctx, arg_DEC *a);
+typedef arg_rd_rr arg_MUL;
+static bool trans_MUL(DisasContext *ctx, arg_MUL *a);
+typedef arg_rd_rr arg_MULS;
+static bool trans_MULS(DisasContext *ctx, arg_MULS *a);
+typedef arg_rd_rr arg_MULSU;
+static bool trans_MULSU(DisasContext *ctx, arg_MULSU *a);
+typedef arg_rd_rr arg_FMUL;
+static bool trans_FMUL(DisasContext *ctx, arg_FMUL *a);
+typedef arg_rd_rr arg_FMULS;
+static bool trans_FMULS(DisasContext *ctx, arg_FMULS *a);
+typedef arg_rd_rr arg_FMULSU;
+static bool trans_FMULSU(DisasContext *ctx, arg_FMULSU *a);
+typedef arg_decode_insn3 arg_DES;
+static bool trans_DES(DisasContext *ctx, arg_DES *a);
+typedef arg_decode_insn3 arg_RJMP;
+static bool trans_RJMP(DisasContext *ctx, arg_RJMP *a);
+typedef arg_decode_insn6 arg_IJMP;
+static bool trans_IJMP(DisasContext *ctx, arg_IJMP *a);
+typedef arg_decode_insn6 arg_EIJMP;
+static bool trans_EIJMP(DisasContext *ctx, arg_EIJMP *a);
+typedef arg_decode_insn3 arg_JMP;
+static bool trans_JMP(DisasContext *ctx, arg_JMP *a);
+typedef arg_decode_insn3 arg_RCALL;
+static bool trans_RCALL(DisasContext *ctx, arg_RCALL *a);
+typedef arg_decode_insn6 arg_ICALL;
+static bool trans_ICALL(DisasContext *ctx, arg_ICALL *a);
+typedef arg_decode_insn6 arg_EICALL;
+static bool trans_EICALL(DisasContext *ctx, arg_EICALL *a);
+typedef arg_decode_insn3 arg_CALL;
+static bool trans_CALL(DisasContext *ctx, arg_CALL *a);
+typedef arg_decode_insn6 arg_RET;
+static bool trans_RET(DisasContext *ctx, arg_RET *a);
+typedef arg_decode_insn6 arg_RETI;
+static bool trans_RETI(DisasContext *ctx, arg_RETI *a);
+typedef arg_rd_rr arg_CPSE;
+static bool trans_CPSE(DisasContext *ctx, arg_CPSE *a);
+typedef arg_rd_rr arg_CP;
+static bool trans_CP(DisasContext *ctx, arg_CP *a);
+typedef arg_rd_rr arg_CPC;
+static bool trans_CPC(DisasContext *ctx, arg_CPC *a);
+typedef arg_rd_imm arg_CPI;
+static bool trans_CPI(DisasContext *ctx, arg_CPI *a);
+typedef arg_decode_insn7 arg_SBRC;
+static bool trans_SBRC(DisasContext *ctx, arg_SBRC *a);
+typedef arg_decode_insn7 arg_SBRS;
+static bool trans_SBRS(DisasContext *ctx, arg_SBRS *a);
+typedef arg_decode_insn8 arg_SBIC;
+static bool trans_SBIC(DisasContext *ctx, arg_SBIC *a);
+typedef arg_decode_insn8 arg_SBIS;
+static bool trans_SBIS(DisasContext *ctx, arg_SBIS *a);
+typedef arg_decode_insn5 arg_BRBS;
+static bool trans_BRBS(DisasContext *ctx, arg_BRBS *a);
+typedef arg_decode_insn5 arg_BRBC;
+static bool trans_BRBC(DisasContext *ctx, arg_BRBC *a);
+typedef arg_rd_rr arg_MOV;
+static bool trans_MOV(DisasContext *ctx, arg_MOV *a);
+typedef arg_rd_rr arg_MOVW;
+static bool trans_MOVW(DisasContext *ctx, arg_MOVW *a);
+typedef arg_rd_imm arg_LDI;
+static bool trans_LDI(DisasContext *ctx, arg_LDI *a);
+typedef arg_rd_imm arg_LDS;
+static bool trans_LDS(DisasContext *ctx, arg_LDS *a);
+typedef arg_decode_insn2 arg_LDX1;
+static bool trans_LDX1(DisasContext *ctx, arg_LDX1 *a);
+typedef arg_decode_insn2 arg_LDX2;
+static bool trans_LDX2(DisasContext *ctx, arg_LDX2 *a);
+typedef arg_decode_insn2 arg_LDX3;
+static bool trans_LDX3(DisasContext *ctx, arg_LDX3 *a);
+typedef arg_decode_insn2 arg_LDY2;
+static bool trans_LDY2(DisasContext *ctx, arg_LDY2 *a);
+typedef arg_decode_insn2 arg_LDY3;
+static bool trans_LDY3(DisasContext *ctx, arg_LDY3 *a);
+typedef arg_decode_insn2 arg_LDZ2;
+static bool trans_LDZ2(DisasContext *ctx, arg_LDZ2 *a);
+typedef arg_decode_insn2 arg_LDZ3;
+static bool trans_LDZ3(DisasContext *ctx, arg_LDZ3 *a);
+typedef arg_rd_imm arg_LDDY;
+static bool trans_LDDY(DisasContext *ctx, arg_LDDY *a);
+typedef arg_rd_imm arg_LDDZ;
+static bool trans_LDDZ(DisasContext *ctx, arg_LDDZ *a);
+typedef arg_rd_imm arg_STS;
+static bool trans_STS(DisasContext *ctx, arg_STS *a);
+typedef arg_decode_insn9 arg_STX1;
+static bool trans_STX1(DisasContext *ctx, arg_STX1 *a);
+typedef arg_decode_insn9 arg_STX2;
+static bool trans_STX2(DisasContext *ctx, arg_STX2 *a);
+typedef arg_decode_insn9 arg_STX3;
+static bool trans_STX3(DisasContext *ctx, arg_STX3 *a);
+typedef arg_decode_insn2 arg_STY2;
+static bool trans_STY2(DisasContext *ctx, arg_STY2 *a);
+typedef arg_decode_insn2 arg_STY3;
+static bool trans_STY3(DisasContext *ctx, arg_STY3 *a);
+typedef arg_decode_insn2 arg_STZ2;
+static bool trans_STZ2(DisasContext *ctx, arg_STZ2 *a);
+typedef arg_decode_insn2 arg_STZ3;
+static bool trans_STZ3(DisasContext *ctx, arg_STZ3 *a);
+typedef arg_rd_imm arg_STDY;
+static bool trans_STDY(DisasContext *ctx, arg_STDY *a);
+typedef arg_rd_imm arg_STDZ;
+static bool trans_STDZ(DisasContext *ctx, arg_STDZ *a);
+typedef arg_decode_insn6 arg_LPM1;
+static bool trans_LPM1(DisasContext *ctx, arg_LPM1 *a);
+typedef arg_decode_insn2 arg_LPM2;
+static bool trans_LPM2(DisasContext *ctx, arg_LPM2 *a);
+typedef arg_decode_insn2 arg_LPMX;
+static bool trans_LPMX(DisasContext *ctx, arg_LPMX *a);
+typedef arg_decode_insn6 arg_ELPM1;
+static bool trans_ELPM1(DisasContext *ctx, arg_ELPM1 *a);
+typedef arg_decode_insn2 arg_ELPM2;
+static bool trans_ELPM2(DisasContext *ctx, arg_ELPM2 *a);
+typedef arg_decode_insn2 arg_ELPMX;
+static bool trans_ELPMX(DisasContext *ctx, arg_ELPMX *a);
+typedef arg_decode_insn6 arg_SPM;
+static bool trans_SPM(DisasContext *ctx, arg_SPM *a);
+typedef arg_decode_insn6 arg_SPMX;
+static bool trans_SPMX(DisasContext *ctx, arg_SPMX *a);
+typedef arg_rd_imm arg_IN;
+static bool trans_IN(DisasContext *ctx, arg_IN *a);
+typedef arg_rd_imm arg_OUT;
+static bool trans_OUT(DisasContext *ctx, arg_OUT *a);
+typedef arg_decode_insn2 arg_PUSH;
+static bool trans_PUSH(DisasContext *ctx, arg_PUSH *a);
+typedef arg_decode_insn2 arg_POP;
+static bool trans_POP(DisasContext *ctx, arg_POP *a);
+typedef arg_decode_insn2 arg_XCH;
+static bool trans_XCH(DisasContext *ctx, arg_XCH *a);
+typedef arg_decode_insn2 arg_LAC;
+static bool trans_LAC(DisasContext *ctx, arg_LAC *a);
+typedef arg_decode_insn2 arg_LAS;
+static bool trans_LAS(DisasContext *ctx, arg_LAS *a);
+typedef arg_decode_insn2 arg_LAT;
+static bool trans_LAT(DisasContext *ctx, arg_LAT *a);
+typedef arg_decode_insn2 arg_LSR;
+static bool trans_LSR(DisasContext *ctx, arg_LSR *a);
+typedef arg_decode_insn2 arg_ROR;
+static bool trans_ROR(DisasContext *ctx, arg_ROR *a);
+typedef arg_decode_insn2 arg_ASR;
+static bool trans_ASR(DisasContext *ctx, arg_ASR *a);
+typedef arg_decode_insn2 arg_SWAP;
+static bool trans_SWAP(DisasContext *ctx, arg_SWAP *a);
+typedef arg_decode_insn8 arg_SBI;
+static bool trans_SBI(DisasContext *ctx, arg_SBI *a);
+typedef arg_decode_insn8 arg_CBI;
+static bool trans_CBI(DisasContext *ctx, arg_CBI *a);
+typedef arg_decode_insn10 arg_BST;
+static bool trans_BST(DisasContext *ctx, arg_BST *a);
+typedef arg_decode_insn10 arg_BLD;
+static bool trans_BLD(DisasContext *ctx, arg_BLD *a);
+typedef arg_decode_insn4 arg_BSET;
+static bool trans_BSET(DisasContext *ctx, arg_BSET *a);
+typedef arg_decode_insn4 arg_BCLR;
+static bool trans_BCLR(DisasContext *ctx, arg_BCLR *a);
+typedef arg_decode_insn6 arg_BREAK;
+static bool trans_BREAK(DisasContext *ctx, arg_BREAK *a);
+typedef arg_decode_insn6 arg_NOP;
+static bool trans_NOP(DisasContext *ctx, arg_NOP *a);
+typedef arg_decode_insn6 arg_SLEEP;
+static bool trans_SLEEP(DisasContext *ctx, arg_SLEEP *a);
+typedef arg_decode_insn6 arg_WDR;
+static bool trans_WDR(DisasContext *ctx, arg_WDR *a);
+
+static void decode_insn_extract_decode_insn_Fmt_10(DisasContext *ctx, arg_decode_insn6 *a, uint16_t insn)
+{
+}
+
+static void decode_insn_extract_decode_insn_Fmt_11(DisasContext *ctx, arg_decode_insn3 *a, uint16_t insn)
+{
+    a->imm = append_16(ctx, deposit32(extract32(insn, 0, 1), 1, 31, extract32(insn, 4, 5)));
+}
+
+static void decode_insn_extract_decode_insn_Fmt_12(DisasContext *ctx, arg_decode_insn7 *a, uint16_t insn)
+{
+    a->rr = extract32(insn, 4, 5);
+    a->bit = extract32(insn, 0, 3);
+}
+
+static void decode_insn_extract_decode_insn_Fmt_13(DisasContext *ctx, arg_decode_insn8 *a, uint16_t insn)
+{
+    a->reg = extract32(insn, 3, 5);
+    a->bit = extract32(insn, 0, 3);
+}
+
+static void decode_insn_extract_decode_insn_Fmt_17(DisasContext *ctx, arg_rd_rr *a, uint16_t insn)
+{
+    a->rd = to_regs_00_30_by_two(ctx, extract32(insn, 4, 4));
+    a->rr = to_regs_00_30_by_two(ctx, extract32(insn, 0, 4));
+}
+
+static void decode_insn_extract_decode_insn_Fmt_18(DisasContext *ctx, arg_decode_insn9 *a, uint16_t insn)
+{
+    a->rr = extract32(insn, 4, 5);
+}
+
+static void decode_insn_extract_decode_insn_Fmt_19(DisasContext *ctx, arg_decode_insn10 *a, uint16_t insn)
+{
+    a->rd = extract32(insn, 4, 5);
+    a->bit = extract32(insn, 0, 3);
+}
+
+static void decode_insn_extract_decode_insn_Fmt_4(DisasContext *ctx, arg_decode_insn2 *a, uint16_t insn)
+{
+    a->rd = extract32(insn, 4, 5);
+}
+
+static void decode_insn_extract_decode_insn_Fmt_5(DisasContext *ctx, arg_rd_rr *a, uint16_t insn)
+{
+    a->rd = to_regs_16_31_by_one(ctx, extract32(insn, 4, 4));
+    a->rr = to_regs_16_31_by_one(ctx, extract32(insn, 0, 4));
+}
+
+static void decode_insn_extract_decode_insn_Fmt_6(DisasContext *ctx, arg_decode_insn3 *a, uint16_t insn)
+{
+    a->imm = extract32(insn, 4, 4);
+}
+
+static void decode_insn_extract_decode_insn_Fmt_9(DisasContext *ctx, arg_decode_insn3 *a, uint16_t insn)
+{
+    a->imm = sextract32(insn, 0, 12);
+}
+
+static void decode_insn_extract_fmul(DisasContext *ctx, arg_rd_rr *a, uint16_t insn)
+{
+    a->rd = to_regs_16_23_by_one(ctx, extract32(insn, 4, 3));
+    a->rr = to_regs_16_23_by_one(ctx, extract32(insn, 0, 3));
+}
+
+static void decode_insn_extract_io_rd_imm(DisasContext *ctx, arg_rd_imm *a, uint16_t insn)
+{
+    a->rd = extract32(insn, 4, 5);
+    a->imm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 9, 2));
+}
+
+static void decode_insn_extract_ldst_d(DisasContext *ctx, arg_rd_imm *a, uint16_t insn)
+{
+    a->rd = extract32(insn, 4, 5);
+    a->imm = deposit32(deposit32(extract32(insn, 0, 3), 3, 29, extract32(insn, 10, 2)), 5, 27, extract32(insn, 13, 1));
+}
+
+static void decode_insn_extract_ldst_s(DisasContext *ctx, arg_rd_imm *a, uint16_t insn)
+{
+    a->rd = extract32(insn, 4, 5);
+    a->imm = 0;
+}
+
+static void decode_insn_extract_op_bit(DisasContext *ctx, arg_decode_insn4 *a, uint16_t insn)
+{
+    a->bit = extract32(insn, 4, 3);
+}
+
+static void decode_insn_extract_op_bit_imm(DisasContext *ctx, arg_decode_insn5 *a, uint16_t insn)
+{
+    a->imm = sextract32(insn, 3, 7);
+    a->bit = extract32(insn, 0, 3);
+}
+
+static void decode_insn_extract_op_rd_imm6(DisasContext *ctx, arg_rd_imm *a, uint16_t insn)
+{
+    a->rd = to_regs_24_30_by_two(ctx, extract32(insn, 4, 2));
+    a->imm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 6, 2));
+}
+
+static void decode_insn_extract_op_rd_imm8(DisasContext *ctx, arg_rd_imm *a, uint16_t insn)
+{
+    a->rd = to_regs_16_31_by_one(ctx, extract32(insn, 4, 4));
+    a->imm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 8, 4));
+}
+
+static void decode_insn_extract_op_rd_rr(DisasContext *ctx, arg_rd_rr *a, uint16_t insn)
+{
+    a->rd = extract32(insn, 4, 5);
+    a->rr = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 9, 1));
+}
+
+bool decode_insn(DisasContext *ctx, uint16_t insn)
+{
+    union {
+        arg_decode_insn10 f_decode_insn10;
+        arg_decode_insn2 f_decode_insn2;
+        arg_decode_insn3 f_decode_insn3;
+        arg_decode_insn4 f_decode_insn4;
+        arg_decode_insn5 f_decode_insn5;
+        arg_decode_insn6 f_decode_insn6;
+        arg_decode_insn7 f_decode_insn7;
+        arg_decode_insn8 f_decode_insn8;
+        arg_decode_insn9 f_decode_insn9;
+        arg_rd_imm f_rd_imm;
+        arg_rd_rr f_rd_rr;
+    } u;
+
+    switch (insn & 0x0000d000) {
+    case 0x00000000:
+        /* 00.0.... ........ */
+        switch (insn & 0x00002c00) {
+        case 0x00000000:
+            /* 000000.. ........ */
+            switch ((insn >> 8) & 0x3) {
+            case 0x0:
+                /* 00000000 ........ */
+                decode_insn_extract_decode_insn_Fmt_10(ctx, &u.f_decode_insn6, insn);
+                switch (insn & 0x000000ff) {
+                case 0x00000000:
+                    /* 00000000 00000000 */
+                    /* insn.decode:185 */
+                    if (trans_NOP(ctx, &u.f_decode_insn6)) return true;
+                    break;
+                }
+                break;
+            case 0x1:
+                /* 00000001 ........ */
+                /* insn.decode:128 */
+                decode_insn_extract_decode_insn_Fmt_17(ctx, &u.f_rd_rr, insn);
+                if (trans_MOVW(ctx, &u.f_rd_rr)) return true;
+                break;
+            case 0x2:
+                /* 00000010 ........ */
+                /* insn.decode:71 */
+                decode_insn_extract_decode_insn_Fmt_5(ctx, &u.f_rd_rr, insn);
+                if (trans_MULS(ctx, &u.f_rd_rr)) return true;
+                break;
+            case 0x3:
+                /* 00000011 ........ */
+                decode_insn_extract_fmul(ctx, &u.f_rd_rr, insn);
+                switch (insn & 0x00000088) {
+                case 0x00000000:
+                    /* 00000011 0...0... */
+                    /* insn.decode:72 */
+                    if (trans_MULSU(ctx, &u.f_rd_rr)) return true;
+                    break;
+                case 0x00000008:
+                    /* 00000011 0...1... */
+                    /* insn.decode:73 */
+                    if (trans_FMUL(ctx, &u.f_rd_rr)) return true;
+                    break;
+                case 0x00000080:
+                    /* 00000011 1...0... */
+                    /* insn.decode:74 */
+                    if (trans_FMULS(ctx, &u.f_rd_rr)) return true;
+                    break;
+                case 0x00000088:
+                    /* 00000011 1...1... */
+                    /* insn.decode:75 */
+                    if (trans_FMULSU(ctx, &u.f_rd_rr)) return true;
+                    break;
+                }
+                break;
+            }
+            break;
+        case 0x00000400:
+            /* 000001.. ........ */
+            /* insn.decode:102 */
+            decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn);
+            if (trans_CPC(ctx, &u.f_rd_rr)) return true;
+            break;
+        case 0x00000800:
+            /* 000010.. ........ */
+            /* insn.decode:58 */
+            decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn);
+            if (trans_SBC(ctx, &u.f_rd_rr)) return true;
+            break;
+        case 0x00000c00:
+            /* 000011.. ........ */
+            /* insn.decode:53 */
+            decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn);
+            if (trans_ADD(ctx, &u.f_rd_rr)) return true;
+            break;
+        case 0x00002000:
+            /* 001000.. ........ */
+            /* insn.decode:61 */
+            decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn);
+            if (trans_AND(ctx, &u.f_rd_rr)) return true;
+            break;
+        case 0x00002400:
+            /* 001001.. ........ */
+            /* insn.decode:65 */
+            decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn);
+            if (trans_EOR(ctx, &u.f_rd_rr)) return true;
+            break;
+        case 0x00002800:
+            /* 001010.. ........ */
+            /* insn.decode:63 */
+            decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn);
+            if (trans_OR(ctx, &u.f_rd_rr)) return true;
+            break;
+        case 0x00002c00:
+            /* 001011.. ........ */
+            /* insn.decode:127 */
+            decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn);
+            if (trans_MOV(ctx, &u.f_rd_rr)) return true;
+            break;
+        }
+        break;
+    case 0x00001000:
+        /* 00.1.... ........ */
+        switch ((insn >> 13) & 0x1) {
+        case 0x0:
+            /* 0001.... ........ */
+            decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn);
+            switch ((insn >> 10) & 0x3) {
+            case 0x0:
+                /* 000100.. ........ */
+                /* insn.decode:100 */
+                if (trans_CPSE(ctx, &u.f_rd_rr)) return true;
+                break;
+            case 0x1:
+                /* 000101.. ........ */
+                /* insn.decode:101 */
+                if (trans_CP(ctx, &u.f_rd_rr)) return true;
+                break;
+            case 0x2:
+                /* 000110.. ........ */
+                /* insn.decode:56 */
+                if (trans_SUB(ctx, &u.f_rd_rr)) return true;
+                break;
+            case 0x3:
+                /* 000111.. ........ */
+                /* insn.decode:54 */
+                if (trans_ADC(ctx, &u.f_rd_rr)) return true;
+                break;
+            }
+            break;
+        case 0x1:
+            /* 0011.... ........ */
+            /* insn.decode:103 */
+            decode_insn_extract_op_rd_imm8(ctx, &u.f_rd_imm, insn);
+            if (trans_CPI(ctx, &u.f_rd_imm)) return true;
+            break;
+        }
+        break;
+    case 0x00004000:
+        /* 01.0.... ........ */
+        decode_insn_extract_op_rd_imm8(ctx, &u.f_rd_imm, insn);
+        switch ((insn >> 13) & 0x1) {
+        case 0x0:
+            /* 0100.... ........ */
+            /* insn.decode:59 */
+            if (trans_SBCI(ctx, &u.f_rd_imm)) return true;
+            break;
+        case 0x1:
+            /* 0110.... ........ */
+            /* insn.decode:64 */
+            if (trans_ORI(ctx, &u.f_rd_imm)) return true;
+            break;
+        }
+        break;
+    case 0x00005000:
+        /* 01.1.... ........ */
+        decode_insn_extract_op_rd_imm8(ctx, &u.f_rd_imm, insn);
+        switch ((insn >> 13) & 0x1) {
+        case 0x0:
+            /* 0101.... ........ */
+            /* insn.decode:57 */
+            if (trans_SUBI(ctx, &u.f_rd_imm)) return true;
+            break;
+        case 0x1:
+            /* 0111.... ........ */
+            /* insn.decode:62 */
+            if (trans_ANDI(ctx, &u.f_rd_imm)) return true;
+            break;
+        }
+        break;
+    case 0x00008000:
+        /* 10.0.... ........ */
+        decode_insn_extract_ldst_d(ctx, &u.f_rd_imm, insn);
+        switch (insn & 0x00000208) {
+        case 0x00000000:
+            /* 10.0..0. ....0... */
+            /* insn.decode:139 */
+            if (trans_LDDZ(ctx, &u.f_rd_imm)) return true;
+            break;
+        case 0x00000008:
+            /* 10.0..0. ....1... */
+            /* insn.decode:138 */
+            if (trans_LDDY(ctx, &u.f_rd_imm)) return true;
+            break;
+        case 0x00000200:
+            /* 10.0..1. ....0... */
+            /* insn.decode:149 */
+            if (trans_STDZ(ctx, &u.f_rd_imm)) return true;
+            break;
+        case 0x00000208:
+            /* 10.0..1. ....1... */
+            /* insn.decode:148 */
+            if (trans_STDY(ctx, &u.f_rd_imm)) return true;
+            break;
+        }
+        break;
+    case 0x00009000:
+        /* 10.1.... ........ */
+        switch (insn & 0x00002800) {
+        case 0x00000000:
+            /* 10010... ........ */
+            switch ((insn >> 9) & 0x3) {
+            case 0x0:
+                /* 1001000. ........ */
+                switch (insn & 0x0000000f) {
+                case 0x00000000:
+                    /* 1001000. ....0000 */
+                    /* insn.decode:130 */
+                    decode_insn_extract_ldst_s(ctx, &u.f_rd_imm, insn);
+                    if (trans_LDS(ctx, &u.f_rd_imm)) return true;
+                    break;
+                case 0x00000001:
+                    /* 1001000. ....0001 */
+                    /* insn.decode:136 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LDZ2(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000002:
+                    /* 1001000. ....0010 */
+                    /* insn.decode:137 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LDZ3(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000004:
+                    /* 1001000. ....0100 */
+                    /* insn.decode:151 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LPM2(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000005:
+                    /* 1001000. ....0101 */
+                    /* insn.decode:152 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LPMX(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000006:
+                    /* 1001000. ....0110 */
+                    /* insn.decode:154 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_ELPM2(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000007:
+                    /* 1001000. ....0111 */
+                    /* insn.decode:155 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_ELPMX(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000009:
+                    /* 1001000. ....1001 */
+                    /* insn.decode:134 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LDY2(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x0000000a:
+                    /* 1001000. ....1010 */
+                    /* insn.decode:135 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LDY3(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x0000000c:
+                    /* 1001000. ....1100 */
+                    /* insn.decode:131 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LDX1(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x0000000d:
+                    /* 1001000. ....1101 */
+                    /* insn.decode:132 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LDX2(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x0000000e:
+                    /* 1001000. ....1110 */
+                    /* insn.decode:133 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LDX3(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x0000000f:
+                    /* 1001000. ....1111 */
+                    /* insn.decode:161 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_POP(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                }
+                break;
+            case 0x1:
+                /* 1001001. ........ */
+                switch (insn & 0x0000000f) {
+                case 0x00000000:
+                    /* 1001001. ....0000 */
+                    /* insn.decode:140 */
+                    decode_insn_extract_ldst_s(ctx, &u.f_rd_imm, insn);
+                    if (trans_STS(ctx, &u.f_rd_imm)) return true;
+                    break;
+                case 0x00000001:
+                    /* 1001001. ....0001 */
+                    /* insn.decode:146 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_STZ2(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000002:
+                    /* 1001001. ....0010 */
+                    /* insn.decode:147 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_STZ3(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000004:
+                    /* 1001001. ....0100 */
+                    /* insn.decode:162 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_XCH(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000005:
+                    /* 1001001. ....0101 */
+                    /* insn.decode:164 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LAS(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000006:
+                    /* 1001001. ....0110 */
+                    /* insn.decode:163 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LAC(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000007:
+                    /* 1001001. ....0111 */
+                    /* insn.decode:165 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_LAT(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x00000009:
+                    /* 1001001. ....1001 */
+                    /* insn.decode:144 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_STY2(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x0000000a:
+                    /* 1001001. ....1010 */
+                    /* insn.decode:145 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_STY3(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                case 0x0000000c:
+                    /* 1001001. ....1100 */
+                    /* insn.decode:141 */
+                    decode_insn_extract_decode_insn_Fmt_18(ctx, &u.f_decode_insn9, insn);
+                    if (trans_STX1(ctx, &u.f_decode_insn9)) return true;
+                    break;
+                case 0x0000000d:
+                    /* 1001001. ....1101 */
+                    /* insn.decode:142 */
+                    decode_insn_extract_decode_insn_Fmt_18(ctx, &u.f_decode_insn9, insn);
+                    if (trans_STX2(ctx, &u.f_decode_insn9)) return true;
+                    break;
+                case 0x0000000e:
+                    /* 1001001. ....1110 */
+                    /* insn.decode:143 */
+                    decode_insn_extract_decode_insn_Fmt_18(ctx, &u.f_decode_insn9, insn);
+                    if (trans_STX3(ctx, &u.f_decode_insn9)) return true;
+                    break;
+                case 0x0000000f:
+                    /* 1001001. ....1111 */
+                    /* insn.decode:160 */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    if (trans_PUSH(ctx, &u.f_decode_insn2)) return true;
+                    break;
+                }
+                break;
+            case 0x2:
+                /* 1001010. ........ */
+                switch ((insn >> 1) & 0x7) {
+                case 0x0:
+                    /* 1001010. ....000. */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    switch (insn & 0x00000001) {
+                    case 0x00000000:
+                        /* 1001010. ....0000 */
+                        /* insn.decode:66 */
+                        if (trans_COM(ctx, &u.f_decode_insn2)) return true;
+                        break;
+                    case 0x00000001:
+                        /* 1001010. ....0001 */
+                        /* insn.decode:67 */
+                        if (trans_NEG(ctx, &u.f_decode_insn2)) return true;
+                        break;
+                    }
+                    break;
+                case 0x1:
+                    /* 1001010. ....001. */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    switch (insn & 0x00000001) {
+                    case 0x00000000:
+                        /* 1001010. ....0010 */
+                        /* insn.decode:173 */
+                        if (trans_SWAP(ctx, &u.f_decode_insn2)) return true;
+                        break;
+                    case 0x00000001:
+                        /* 1001010. ....0011 */
+                        /* insn.decode:68 */
+                        if (trans_INC(ctx, &u.f_decode_insn2)) return true;
+                        break;
+                    }
+                    break;
+                case 0x2:
+                    /* 1001010. ....010. */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    switch (insn & 0x00000001) {
+                    case 0x00000001:
+                        /* 1001010. ....0101 */
+                        /* insn.decode:172 */
+                        if (trans_ASR(ctx, &u.f_decode_insn2)) return true;
+                        break;
+                    }
+                    break;
+                case 0x3:
+                    /* 1001010. ....011. */
+                    decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                    switch (insn & 0x00000001) {
+                    case 0x00000000:
+                        /* 1001010. ....0110 */
+                        /* insn.decode:170 */
+                        if (trans_LSR(ctx, &u.f_decode_insn2)) return true;
+                        break;
+                    case 0x00000001:
+                        /* 1001010. ....0111 */
+                        /* insn.decode:171 */
+                        if (trans_ROR(ctx, &u.f_decode_insn2)) return true;
+                        break;
+                    }
+                    break;
+                case 0x4:
+                    /* 1001010. ....100. */
+                    switch (insn & 0x00000181) {
+                    case 0x00000000:
+                        /* 10010100 0...1000 */
+                        /* insn.decode:178 */
+                        decode_insn_extract_op_bit(ctx, &u.f_decode_insn4, insn);
+                        if (trans_BSET(ctx, &u.f_decode_insn4)) return true;
+                        break;
+                    case 0x00000001:
+                        /* 10010100 0...1001 */
+                        decode_insn_extract_decode_insn_Fmt_10(ctx, &u.f_decode_insn6, insn);
+                        switch ((insn >> 4) & 0x7) {
+                        case 0x0:
+                            /* 10010100 00001001 */
+                            /* insn.decode:91 */
+                            if (trans_IJMP(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        case 0x1:
+                            /* 10010100 00011001 */
+                            /* insn.decode:92 */
+                            if (trans_EIJMP(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        }
+                        break;
+                    case 0x00000080:
+                        /* 10010100 1...1000 */
+                        /* insn.decode:179 */
+                        decode_insn_extract_op_bit(ctx, &u.f_decode_insn4, insn);
+                        if (trans_BCLR(ctx, &u.f_decode_insn4)) return true;
+                        break;
+                    case 0x00000100:
+                        /* 10010101 0...1000 */
+                        decode_insn_extract_decode_insn_Fmt_10(ctx, &u.f_decode_insn6, insn);
+                        switch ((insn >> 4) & 0x7) {
+                        case 0x0:
+                            /* 10010101 00001000 */
+                            /* insn.decode:98 */
+                            if (trans_RET(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        case 0x1:
+                            /* 10010101 00011000 */
+                            /* insn.decode:99 */
+                            if (trans_RETI(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        }
+                        break;
+                    case 0x00000101:
+                        /* 10010101 0...1001 */
+                        decode_insn_extract_decode_insn_Fmt_10(ctx, &u.f_decode_insn6, insn);
+                        switch ((insn >> 4) & 0x7) {
+                        case 0x0:
+                            /* 10010101 00001001 */
+                            /* insn.decode:95 */
+                            if (trans_ICALL(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        case 0x1:
+                            /* 10010101 00011001 */
+                            /* insn.decode:96 */
+                            if (trans_EICALL(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        }
+                        break;
+                    case 0x00000180:
+                        /* 10010101 1...1000 */
+                        decode_insn_extract_decode_insn_Fmt_10(ctx, &u.f_decode_insn6, insn);
+                        switch ((insn >> 4) & 0x7) {
+                        case 0x0:
+                            /* 10010101 10001000 */
+                            /* insn.decode:186 */
+                            if (trans_SLEEP(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        case 0x1:
+                            /* 10010101 10011000 */
+                            /* insn.decode:184 */
+                            if (trans_BREAK(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        case 0x2:
+                            /* 10010101 10101000 */
+                            /* insn.decode:187 */
+                            if (trans_WDR(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        case 0x4:
+                            /* 10010101 11001000 */
+                            /* insn.decode:150 */
+                            if (trans_LPM1(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        case 0x5:
+                            /* 10010101 11011000 */
+                            /* insn.decode:153 */
+                            if (trans_ELPM1(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        case 0x6:
+                            /* 10010101 11101000 */
+                            /* insn.decode:156 */
+                            if (trans_SPM(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        case 0x7:
+                            /* 10010101 11111000 */
+                            /* insn.decode:157 */
+                            if (trans_SPMX(ctx, &u.f_decode_insn6)) return true;
+                            break;
+                        }
+                        break;
+                    }
+                    break;
+                case 0x5:
+                    /* 1001010. ....101. */
+                    switch (insn & 0x00000001) {
+                    case 0x00000000:
+                        /* 1001010. ....1010 */
+                        /* insn.decode:69 */
+                        decode_insn_extract_decode_insn_Fmt_4(ctx, &u.f_decode_insn2, insn);
+                        if (trans_DEC(ctx, &u.f_decode_insn2)) return true;
+                        break;
+                    case 0x00000001:
+                        /* 1001010. ....1011 */
+                        decode_insn_extract_decode_insn_Fmt_6(ctx, &u.f_decode_insn3, insn);
+                        switch ((insn >> 8) & 0x1) {
+                        case 0x0:
+                            /* 10010100 ....1011 */
+                            /* insn.decode:76 */
+                            if (trans_DES(ctx, &u.f_decode_insn3)) return true;
+                            break;
+                        }
+                        break;
+                    }
+                    break;
+                case 0x6:
+                    /* 1001010. ....110. */
+                    /* insn.decode:93 */
+                    decode_insn_extract_decode_insn_Fmt_11(ctx, &u.f_decode_insn3, insn);
+                    if (trans_JMP(ctx, &u.f_decode_insn3)) return true;
+                    break;
+                case 0x7:
+                    /* 1001010. ....111. */
+                    /* insn.decode:97 */
+                    decode_insn_extract_decode_insn_Fmt_11(ctx, &u.f_decode_insn3, insn);
+                    if (trans_CALL(ctx, &u.f_decode_insn3)) return true;
+                    break;
+                }
+                break;
+            case 0x3:
+                /* 1001011. ........ */
+                decode_insn_extract_op_rd_imm6(ctx, &u.f_rd_imm, insn);
+                switch ((insn >> 8) & 0x1) {
+                case 0x0:
+                    /* 10010110 ........ */
+                    /* insn.decode:55 */
+                    if (trans_ADIW(ctx, &u.f_rd_imm)) return true;
+                    break;
+                case 0x1:
+                    /* 10010111 ........ */
+                    /* insn.decode:60 */
+                    if (trans_SBIW(ctx, &u.f_rd_imm)) return true;
+                    break;
+                }
+                break;
+            }
+            break;
+        case 0x00000800:
+            /* 10011... ........ */
+            switch ((insn >> 10) & 0x1) {
+            case 0x0:
+                /* 100110.. ........ */
+                decode_insn_extract_decode_insn_Fmt_13(ctx, &u.f_decode_insn8, insn);
+                switch ((insn >> 8) & 0x3) {
+                case 0x0:
+                    /* 10011000 ........ */
+                    /* insn.decode:175 */
+                    if (trans_CBI(ctx, &u.f_decode_insn8)) return true;
+                    break;
+                case 0x1:
+                    /* 10011001 ........ */
+                    /* insn.decode:106 */
+                    if (trans_SBIC(ctx, &u.f_decode_insn8)) return true;
+                    break;
+                case 0x2:
+                    /* 10011010 ........ */
+                    /* insn.decode:174 */
+                    if (trans_SBI(ctx, &u.f_decode_insn8)) return true;
+                    break;
+                case 0x3:
+                    /* 10011011 ........ */
+                    /* insn.decode:107 */
+                    if (trans_SBIS(ctx, &u.f_decode_insn8)) return true;
+                    break;
+                }
+                break;
+            case 0x1:
+                /* 100111.. ........ */
+                /* insn.decode:70 */
+                decode_insn_extract_op_rd_rr(ctx, &u.f_rd_rr, insn);
+                if (trans_MUL(ctx, &u.f_rd_rr)) return true;
+                break;
+            }
+            break;
+        case 0x00002000:
+            /* 10110... ........ */
+            /* insn.decode:158 */
+            decode_insn_extract_io_rd_imm(ctx, &u.f_rd_imm, insn);
+            if (trans_IN(ctx, &u.f_rd_imm)) return true;
+            break;
+        case 0x00002800:
+            /* 10111... ........ */
+            /* insn.decode:159 */
+            decode_insn_extract_io_rd_imm(ctx, &u.f_rd_imm, insn);
+            if (trans_OUT(ctx, &u.f_rd_imm)) return true;
+            break;
+        }
+        break;
+    case 0x0000c000:
+        /* 11.0.... ........ */
+        switch ((insn >> 13) & 0x1) {
+        case 0x0:
+            /* 1100.... ........ */
+            /* insn.decode:90 */
+            decode_insn_extract_decode_insn_Fmt_9(ctx, &u.f_decode_insn3, insn);
+            if (trans_RJMP(ctx, &u.f_decode_insn3)) return true;
+            break;
+        case 0x1:
+            /* 1110.... ........ */
+            /* insn.decode:129 */
+            decode_insn_extract_op_rd_imm8(ctx, &u.f_rd_imm, insn);
+            if (trans_LDI(ctx, &u.f_rd_imm)) return true;
+            break;
+        }
+        break;
+    case 0x0000d000:
+        /* 11.1.... ........ */
+        switch ((insn >> 13) & 0x1) {
+        case 0x0:
+            /* 1101.... ........ */
+            /* insn.decode:94 */
+            decode_insn_extract_decode_insn_Fmt_9(ctx, &u.f_decode_insn3, insn);
+            if (trans_RCALL(ctx, &u.f_decode_insn3)) return true;
+            break;
+        case 0x1:
+            /* 1111.... ........ */
+            switch ((insn >> 10) & 0x3) {
+            case 0x0:
+                /* 111100.. ........ */
+                /* insn.decode:108 */
+                decode_insn_extract_op_bit_imm(ctx, &u.f_decode_insn5, insn);
+                if (trans_BRBS(ctx, &u.f_decode_insn5)) return true;
+                break;
+            case 0x1:
+                /* 111101.. ........ */
+                /* insn.decode:109 */
+                decode_insn_extract_op_bit_imm(ctx, &u.f_decode_insn5, insn);
+                if (trans_BRBC(ctx, &u.f_decode_insn5)) return true;
+                break;
+            case 0x2:
+                /* 111110.. ........ */
+                decode_insn_extract_decode_insn_Fmt_19(ctx, &u.f_decode_insn10, insn);
+                switch (insn & 0x00000208) {
+                case 0x00000000:
+                    /* 1111100. ....0... */
+                    /* insn.decode:177 */
+                    if (trans_BLD(ctx, &u.f_decode_insn10)) return true;
+                    break;
+                case 0x00000200:
+                    /* 1111101. ....0... */
+                    /* insn.decode:176 */
+                    if (trans_BST(ctx, &u.f_decode_insn10)) return true;
+                    break;
+                }
+                break;
+            case 0x3:
+                /* 111111.. ........ */
+                decode_insn_extract_decode_insn_Fmt_12(ctx, &u.f_decode_insn7, insn);
+                switch (insn & 0x00000208) {
+                case 0x00000000:
+                    /* 1111110. ....0... */
+                    /* insn.decode:104 */
+                    if (trans_SBRC(ctx, &u.f_decode_insn7)) return true;
+                    break;
+                case 0x00000200:
+                    /* 1111111. ....0... */
+                    /* insn.decode:105 */
+                    if (trans_SBRS(ctx, &u.f_decode_insn7)) return true;
+                    break;
+                }
+                break;
+            }
+            break;
+        }
+        break;
+    }
+    return false;
+}
diff --git a/qemu/target/avr/gdbstub.c b/qemu/target/avr/gdbstub.c
new file mode 100644
index 0000000000..c28ed67efe
--- /dev/null
+++ b/qemu/target/avr/gdbstub.c
@@ -0,0 +1,84 @@
+/*
+ * QEMU AVR gdbstub
+ *
+ * Copyright (c) 2016-2020 Michael Rolnik
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#include "qemu/osdep.h"
+#include "exec/gdbstub.h"
+
+int avr_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
+{
+    AVRCPU *cpu = AVR_CPU(cs);
+    CPUAVRState *env = &cpu->env;
+
+    /*  R */
+    if (n < 32) {
+        return gdb_get_reg8(mem_buf, env->r[n]);
+    }
+
+    /*  SREG */
+    if (n == 32) {
+        uint8_t sreg = cpu_get_sreg(env);
+
+        return gdb_get_reg8(mem_buf, sreg);
+    }
+
+    /*  SP */
+    if (n == 33) {
+        return gdb_get_reg16(mem_buf, env->sp & 0x0000ffff);
+    }
+
+    /*  PC */
+    if (n == 34) {
+        return gdb_get_reg32(mem_buf, env->pc_w * 2);
+    }
+
+    return 0;
+}
+
+int avr_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
+{
+    AVRCPU *cpu = AVR_CPU(cs);
+    CPUAVRState *env = &cpu->env;
+
+    /*  R */
+    if (n < 32) {
+        env->r[n] = *mem_buf;
+        return 1;
+    }
+
+    /*  SREG */
+    if (n == 32) {
+        cpu_set_sreg(env, *mem_buf);
+        return 1;
+    }
+
+    /*  SP */
+    if (n == 33) {
+        env->sp = lduw_p(mem_buf);
+        return 2;
+    }
+
+    /*  PC */
+    if (n == 34) {
+        env->pc_w = ldl_p(mem_buf) / 2;
+        return 4;
+    }
+
+    return 0;
+}
diff --git a/qemu/target/avr/helper.c b/qemu/target/avr/helper.c
new file mode 100644
index 0000000000..60d0a648eb
--- /dev/null
+++ b/qemu/target/avr/helper.c
@@ -0,0 +1,373 @@
+/*
+ * QEMU AVR CPU helpers
+ *
+ * Copyright (c) 2016-2020 Michael Rolnik
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "unicorn_helper.h"
+
+bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+{
+    bool ret = false;
+    CPUClass *cc = CPU_GET_CLASS(cs);
+    AVRCPU *cpu = AVR_CPU(cs);
+    CPUAVRState *env = &cpu->env;
+
+    if (interrupt_request & CPU_INTERRUPT_RESET) {
+        if (cpu_interrupts_enabled(env)) {
+            cs->exception_index = EXCP_RESET;
+            cc->do_interrupt(cs);
+
+            cs->interrupt_request &= ~CPU_INTERRUPT_RESET;
+
+            ret = true;
+        }
+    }
+    if (interrupt_request & CPU_INTERRUPT_HARD) {
+        if (cpu_interrupts_enabled(env) && env->intsrc != 0) {
+            int index = ctz32(env->intsrc);
+            cs->exception_index = EXCP_INT(index);
+            cc->do_interrupt(cs);
+
+            env->intsrc &= env->intsrc - 1; /* clear the interrupt */
+            cs->interrupt_request &= ~CPU_INTERRUPT_HARD;
+
+            ret = true;
+        }
+    }
+    return ret;
+}
+
+void avr_cpu_do_interrupt(CPUState *cs)
+{
+    AVRCPU *cpu = AVR_CPU(cs);
+    CPUAVRState *env = &cpu->env;
+
+    uint32_t ret = env->pc_w;
+    int vector = 0;
+    int size = avr_feature(env, AVR_FEATURE_JMP_CALL) ? 2 : 1;
+    int base = 0;
+
+    if (cs->exception_index == EXCP_RESET) {
+        vector = 0;
+    } else if (env->intsrc != 0) {
+        vector = ctz32(env->intsrc) + 1;
+    }
+
+    if (avr_feature(env, AVR_FEATURE_3_BYTE_PC)) {
+        cpu_stb_data(env, env->sp--, (ret & 0x0000ff));
+        cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >> 8);
+        cpu_stb_data(env, env->sp--, (ret & 0xff0000) >> 16);
+    } else if (avr_feature(env, AVR_FEATURE_2_BYTE_PC)) {
+        cpu_stb_data(env, env->sp--, (ret & 0x0000ff));
+        cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >> 8);
+    } else {
+        cpu_stb_data(env, env->sp--, (ret & 0x0000ff));
+    }
+
+    env->pc_w = base + vector * size;
+    env->sregI = 0; /* clear Global Interrupt Flag */
+
+    cs->exception_index = -1;
+}
+
+int avr_cpu_memory_rw_debug(CPUState *cs, vaddr addr, uint8_t *buf,
+                            int len, bool is_write)
+{
+    return cpu_memory_rw_debug(cs, addr, buf, len, is_write);
+}
+
+hwaddr avr_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
+{
+    return addr; /* I assume 1:1 address correspondance */
+}
+
+bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                      MMUAccessType access_type, int mmu_idx,
+                      bool probe, uintptr_t retaddr)
+{
+    int prot = 0;
+    MemTxAttrs attrs = {0};
+    uint32_t paddr;
+
+    address &= TARGET_PAGE_MASK;
+
+    if (mmu_idx == MMU_CODE_IDX) {
+        /* access to code in flash */
+        paddr = avr_code_base(&AVR_CPU(cs)->env) | address;
+        prot = PAGE_READ | PAGE_EXEC;
+#if 0
+        if (paddr + TARGET_PAGE_SIZE > OFFSET_DATA) {
+            error_report("execution left flash memory");
+            abort();
+        }
+#endif
+    } else if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
+        /*
+         * access to CPU registers, exit and rebuilt this TB to use full access
+         * incase it touches specially handled registers like SREG or SP
+         */
+        AVRCPU *cpu = AVR_CPU(cs);
+        CPUAVRState *env = &cpu->env;
+        env->fullacc = 1;
+        cpu_loop_exit_restore(cs, retaddr);
+    } else {
+        /* access to memory. nothing special */
+        paddr = OFFSET_DATA | address;
+        prot = PAGE_READ | PAGE_WRITE;
+    }
+
+    tlb_set_page_with_attrs(cs, address, paddr, attrs, prot,
+                            mmu_idx, TARGET_PAGE_SIZE);
+
+    return true;
+}
+
+/*
+ *  helpers
+ */
+
+void helper_sleep(CPUAVRState *env)
+{
+    CPUState *cs = env_cpu(env);
+
+    cs->exception_index = EXCP_HLT;
+    cpu_loop_exit(cs);
+}
+
+void helper_unsupported(CPUAVRState *env)
+{
+    CPUState *cs = env_cpu(env);
+
+    /*
+     *  I count not find what happens on the real platform, so
+     *  it's EXCP_DEBUG for meanwhile
+     */
+    cs->exception_index = EXCP_DEBUG;
+#if 0
+    if (qemu_loglevel_mask(LOG_UNIMP)) {
+        qemu_log("UNSUPPORTED\n");
+        cpu_dump_state(cs, stderr, 0);
+    }
+#endif
+    cpu_loop_exit(cs);
+}
+
+void helper_debug(CPUAVRState *env)
+{
+    CPUState *cs = env_cpu(env);
+
+    cs->exception_index = EXCP_DEBUG;
+    cpu_loop_exit(cs);
+}
+
+void helper_break(CPUAVRState *env)
+{
+    CPUState *cs = env_cpu(env);
+
+    cs->exception_index = EXCP_DEBUG;
+    cpu_loop_exit(cs);
+}
+
+void helper_wdr(CPUAVRState *env)
+{
+    CPUState *cs = env_cpu(env);
+
+    /* WD is not implemented yet, placeholder */
+    cs->exception_index = EXCP_DEBUG;
+    cpu_loop_exit(cs);
+}
+
+/*
+ * This function implements IN instruction
+ *
+ * It does the following
+ * a.  if an IO register belongs to CPU, its value is read and returned
+ * b.  otherwise io address is translated to mem address and physical memory
+ *     is read.
+ * c.  it caches the value for sake of SBI, SBIC, SBIS & CBI implementation
+ *
+ */
+target_ulong helper_inb(CPUAVRState *env, uint32_t port)
+{
+    CPUAVRState *const cpu = env;
+    struct uc_struct *const uc = env->uc;
+
+    target_ulong data = 0;
+
+    switch (port) {
+    case 0x38: /* RAMPD */
+        data = 0xff & (env->rampD >> 16);
+        break;
+    case 0x39: /* RAMPX */
+        data = 0xff & (env->rampX >> 16);
+        break;
+    case 0x3a: /* RAMPY */
+        data = 0xff & (env->rampY >> 16);
+        break;
+    case 0x3b: /* RAMPZ */
+        data = 0xff & (env->rampZ >> 16);
+        break;
+    case 0x3c: /* EIND */
+        data = 0xff & (env->eind >> 16);
+        break;
+    case 0x3d: /* SPL */
+        data = env->sp & 0x00ff;
+        break;
+    case 0x3e: /* SPH */
+        data = env->sp >> 8;
+        break;
+    case 0x3f: /* SREG */
+        data = cpu_get_sreg(env);
+        break;
+    default:
+        /* not a special register, pass to normal memory access */
+        data = address_space_ldub(&address_space_memory,
+                                  OFFSET_IO_REGISTERS + port,
+                                  MEMTXATTRS_UNSPECIFIED, NULL);
+    }
+
+    return data;
+}
+
+/*
+ *  This function implements OUT instruction
+ *
+ *  It does the following
+ *  a.  if an IO register belongs to CPU, its value is written into the register
+ *  b.  otherwise io address is translated to mem address and physical memory
+ *      is written.
+ *  c.  it caches the value for sake of SBI, SBIC, SBIS & CBI implementation
+ *
+ */
+void helper_outb(CPUAVRState *env, uint32_t port, uint32_t data)
+{
+    CPUAVRState *const cpu = env;
+    struct uc_struct *const uc = env->uc;
+
+    data &= 0x000000ff;
+
+    switch (port) {
+    case 0x38: /* RAMPD */
+        if (avr_feature(env, AVR_FEATURE_RAMPD)) {
+            env->rampD = (data & 0xff) << 16;
+        }
+        break;
+    case 0x39: /* RAMPX */
+        if (avr_feature(env, AVR_FEATURE_RAMPX)) {
+            env->rampX = (data & 0xff) << 16;
+        }
+        break;
+    case 0x3a: /* RAMPY */
+        if (avr_feature(env, AVR_FEATURE_RAMPY)) {
+            env->rampY = (data & 0xff) << 16;
+        }
+        break;
+    case 0x3b: /* RAMPZ */
+        if (avr_feature(env, AVR_FEATURE_RAMPZ)) {
+            env->rampZ = (data & 0xff) << 16;
+        }
+        break;
+    case 0x3c: /* EIDN */
+        env->eind = (data & 0xff) << 16;
+        break;
+    case 0x3d: /* SPL */
+        env->sp = (env->sp & 0xff00) | (data);
+        break;
+    case 0x3e: /* SPH */
+        if (avr_feature(env, AVR_FEATURE_2_BYTE_SP)) {
+            env->sp = (env->sp & 0x00ff) | (data << 8);
+        }
+        break;
+    case 0x3f: /* SREG */
+        cpu_set_sreg(env, data);
+        break;
+    default:
+        /* not a special register, pass to normal memory access */
+        address_space_stb(&address_space_memory, OFFSET_IO_REGISTERS + port,
+                          data, MEMTXATTRS_UNSPECIFIED, NULL);
+    }
+}
+
+/*
+ *  this function implements LD instruction when there is a posibility to read
+ *  from a CPU register
+ */
+target_ulong helper_fullrd(CPUAVRState *env, uint32_t addr)
+{
+    CPUAVRState *const cpu = env;
+    struct uc_struct *const uc = env->uc;
+
+    uint8_t data;
+
+    env->fullacc = false;
+
+    if (addr < NUMBER_OF_CPU_REGISTERS) {
+        /* CPU registers */
+        data = env->r[addr];
+    } else if (addr < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
+        /* IO registers */
+        data = helper_inb(env, addr - NUMBER_OF_CPU_REGISTERS);
+    } else {
+        /* memory */
+        data = address_space_ldub(&address_space_memory, OFFSET_DATA | addr,
+                                  MEMTXATTRS_UNSPECIFIED, NULL);
+    }
+    return data;
+}
+
+/*
+ *  this function implements ST instruction when there is a posibility to write
+ *  into a CPU register
+ */
+void helper_fullwr(CPUAVRState *env, uint32_t data, uint32_t addr)
+{
+    CPUAVRState *const cpu = env;
+    struct uc_struct *const uc = env->uc;
+
+    env->fullacc = false;
+
+    /* Following logic assumes this: */
+    assert(OFFSET_CPU_REGISTERS == OFFSET_DATA);
+    assert(OFFSET_IO_REGISTERS == OFFSET_CPU_REGISTERS +
+                                  NUMBER_OF_CPU_REGISTERS);
+
+    if (addr < NUMBER_OF_CPU_REGISTERS) {
+        /* CPU registers */
+        env->r[addr] = data;
+    } else if (addr < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
+        /* IO registers */
+        helper_outb(env, addr - NUMBER_OF_CPU_REGISTERS, data);
+    } else {
+        /* memory */
+        address_space_stb(&address_space_memory, OFFSET_DATA | addr, data,
+                          MEMTXATTRS_UNSPECIFIED, NULL);
+    }
+}
+
+void helper_uc_avr_exit(CPUAVRState *env)
+{
+    CPUState *cs = env_cpu(env);
+
+    cs->exception_index = EXCP_HLT;
+    cs->halted = 1;
+    cpu_loop_exit(cs);
+}
diff --git a/qemu/target/avr/helper.h b/qemu/target/avr/helper.h
new file mode 100644
index 0000000000..06fc1d323c
--- /dev/null
+++ b/qemu/target/avr/helper.h
@@ -0,0 +1,37 @@
+/*
+ * QEMU AVR CPU helpers
+ *
+ * Copyright (c) 2016-2020 Michael Rolnik
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+/*
+   Modified for Unicorn Engine by Glenn Baker <glenn.baker@gmx.com>, 2024
+*/
+
+DEF_HELPER_4(uc_tracecode, void, i32, i32, ptr, i64)
+DEF_HELPER_6(uc_traceopcode, void, ptr, i64, i64, i32, ptr, i64)
+DEF_HELPER_1(uc_avr_exit,void, env)
+
+DEF_HELPER_1(wdr, void, env)
+DEF_HELPER_1(debug, void, env)
+DEF_HELPER_1(break, void, env)
+DEF_HELPER_1(sleep, void, env)
+DEF_HELPER_1(unsupported, void, env)
+DEF_HELPER_3(outb, void, env, i32, i32)
+DEF_HELPER_2(inb, tl, env, i32)
+DEF_HELPER_3(fullwr, void, env, i32, i32)
+DEF_HELPER_2(fullrd, tl, env, i32)
diff --git a/qemu/target/avr/insn.decode b/qemu/target/avr/insn.decode
new file mode 100644
index 0000000000..482c23ad0c
--- /dev/null
+++ b/qemu/target/avr/insn.decode
@@ -0,0 +1,187 @@
+#
+# AVR instruction decode definitions.
+#
+# Copyright (c) 2019-2020 Michael Rolnik <mrolnik@gmail.com>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+#
+
+#
+#   regs_16_31_by_one = [16 .. 31]
+#   regs_16_23_by_one = [16 .. 23]
+#   regs_24_30_by_two = [24, 26, 28, 30]
+#   regs_00_30_by_two = [0, 2, 4, 6, 8, .. 30]
+
+%rd             4:5
+%rr             9:1 0:4
+
+%rd_a           4:4                         !function=to_regs_16_31_by_one
+%rd_b           4:3                         !function=to_regs_16_23_by_one
+%rd_c           4:2                         !function=to_regs_24_30_by_two
+%rr_a           0:4                         !function=to_regs_16_31_by_one
+%rr_b           0:3                         !function=to_regs_16_23_by_one
+
+%imm6           6:2 0:4
+%imm8           8:4 0:4
+
+%io_imm         9:2 0:4
+%ldst_d_imm     13:1 10:2 0:3
+
+
+&rd_rr          rd rr
+&rd_imm         rd imm
+
+@op_rd_rr       .... .. . ..... ....        &rd_rr      rd=%rd rr=%rr
+@op_rd_imm6     .... .... .. .. ....        &rd_imm     rd=%rd_c imm=%imm6
+@op_rd_imm8     .... .... .... ....         &rd_imm     rd=%rd_a imm=%imm8
+@fmul           .... .... . ... . ...       &rd_rr      rd=%rd_b rr=%rr_b
+
+#
+# Arithmetic Instructions
+#
+ADD             0000 11 . ..... ....        @op_rd_rr
+ADC             0001 11 . ..... ....        @op_rd_rr
+ADIW            1001 0110 .. .. ....        @op_rd_imm6
+SUB             0001 10 . ..... ....        @op_rd_rr
+SUBI            0101 .... .... ....         @op_rd_imm8
+SBC             0000 10 . ..... ....        @op_rd_rr
+SBCI            0100 .... .... ....         @op_rd_imm8
+SBIW            1001 0111 .. .. ....        @op_rd_imm6
+AND             0010 00 . ..... ....        @op_rd_rr
+ANDI            0111 .... .... ....         @op_rd_imm8
+OR              0010 10 . ..... ....        @op_rd_rr
+ORI             0110 .... .... ....         @op_rd_imm8
+EOR             0010 01 . ..... ....        @op_rd_rr
+COM             1001 010 rd:5 0000
+NEG             1001 010 rd:5 0001
+INC             1001 010 rd:5 0011
+DEC             1001 010 rd:5 1010
+MUL             1001 11 . ..... ....        @op_rd_rr
+MULS            0000 0010 .... ....         &rd_rr      rd=%rd_a rr=%rr_a
+MULSU           0000 0011 0 ... 0 ...       @fmul
+FMUL            0000 0011 0 ... 1 ...       @fmul
+FMULS           0000 0011 1 ... 0 ...       @fmul
+FMULSU          0000 0011 1 ... 1 ...       @fmul
+DES             1001 0100 imm:4 1011
+
+#
+# Branch Instructions
+#
+
+# The 22-bit immediate is partially in the opcode word,
+# and partially in the next.  Use append_16 to build the
+# complete 22-bit value.
+%imm_call       4:5 0:1                     !function=append_16
+
+@op_bit         .... .... . bit:3 ....
+@op_bit_imm     .... .. imm:s7 bit:3
+
+RJMP            1100 imm:s12
+IJMP            1001 0100 0000 1001
+EIJMP           1001 0100 0001 1001
+JMP             1001 010 ..... 110 .        imm=%imm_call
+RCALL           1101 imm:s12
+ICALL           1001 0101 0000 1001
+EICALL          1001 0101 0001 1001
+CALL            1001 010 ..... 111 .        imm=%imm_call
+RET             1001 0101 0000 1000
+RETI            1001 0101 0001 1000
+CPSE            0001 00 . ..... ....        @op_rd_rr
+CP              0001 01 . ..... ....        @op_rd_rr
+CPC             0000 01 . ..... ....        @op_rd_rr
+CPI             0011 .... .... ....         @op_rd_imm8
+SBRC            1111 110 rr:5 0 bit:3
+SBRS            1111 111 rr:5 0 bit:3
+SBIC            1001 1001 reg:5 bit:3
+SBIS            1001 1011 reg:5 bit:3
+BRBS            1111 00 ....... ...         @op_bit_imm
+BRBC            1111 01 ....... ...         @op_bit_imm
+
+#
+# Data Transfer Instructions
+#
+
+%rd_d           4:4                         !function=to_regs_00_30_by_two
+%rr_d           0:4                         !function=to_regs_00_30_by_two
+
+@io_rd_imm      .... . .. ..... ....        &rd_imm     rd=%rd imm=%io_imm
+@ldst_d         .. . . .. . rd:5  . ...     &rd_imm     imm=%ldst_d_imm
+
+# The 16-bit immediate is completely in the next word.
+# Fields cannot be defined with no bits, so we cannot play
+# the same trick and append to a zero-bit value.
+# Defer reading the immediate until trans_{LDS,STS}.
+@ldst_s         .... ... rd:5 ....          imm=0
+
+MOV             0010 11 . ..... ....        @op_rd_rr
+MOVW            0000 0001 .... ....         &rd_rr      rd=%rd_d rr=%rr_d
+LDI             1110 .... .... ....         @op_rd_imm8
+LDS             1001 000 ..... 0000         @ldst_s
+LDX1            1001 000 rd:5 1100
+LDX2            1001 000 rd:5 1101
+LDX3            1001 000 rd:5 1110
+LDY2            1001 000 rd:5 1001
+LDY3            1001 000 rd:5 1010
+LDZ2            1001 000 rd:5 0001
+LDZ3            1001 000 rd:5 0010
+LDDY            10 . 0 .. 0 ..... 1 ...     @ldst_d
+LDDZ            10 . 0 .. 0 ..... 0 ...     @ldst_d
+STS             1001 001 ..... 0000         @ldst_s
+STX1            1001 001 rr:5 1100
+STX2            1001 001 rr:5 1101
+STX3            1001 001 rr:5 1110
+STY2            1001 001 rd:5 1001
+STY3            1001 001 rd:5 1010
+STZ2            1001 001 rd:5 0001
+STZ3            1001 001 rd:5 0010
+STDY            10 . 0 .. 1 ..... 1 ...     @ldst_d
+STDZ            10 . 0 .. 1 ..... 0 ...     @ldst_d
+LPM1            1001 0101 1100 1000
+LPM2            1001 000 rd:5 0100
+LPMX            1001 000 rd:5 0101
+ELPM1           1001 0101 1101 1000
+ELPM2           1001 000 rd:5 0110
+ELPMX           1001 000 rd:5 0111
+SPM             1001 0101 1110 1000
+SPMX            1001 0101 1111 1000
+IN              1011 0 .. ..... ....        @io_rd_imm
+OUT             1011 1 .. ..... ....        @io_rd_imm
+PUSH            1001 001 rd:5 1111
+POP             1001 000 rd:5 1111
+XCH             1001 001 rd:5 0100
+LAC             1001 001 rd:5 0110
+LAS             1001 001 rd:5 0101
+LAT             1001 001 rd:5 0111
+
+#
+# Bit and Bit-test Instructions
+#
+LSR             1001 010 rd:5 0110
+ROR             1001 010 rd:5 0111
+ASR             1001 010 rd:5 0101
+SWAP            1001 010 rd:5 0010
+SBI             1001 1010 reg:5 bit:3
+CBI             1001 1000 reg:5 bit:3
+BST             1111 101 rd:5 0 bit:3
+BLD             1111 100 rd:5 0 bit:3
+BSET            1001 0100 0 bit:3 1000
+BCLR            1001 0100 1 bit:3 1000
+
+#
+# MCU Control Instructions
+#
+BREAK           1001 0101 1001 1000
+NOP             0000 0000 0000 0000
+SLEEP           1001 0101 1000 1000
+WDR             1001 0101 1010 1000
diff --git a/qemu/target/avr/machine.c b/qemu/target/avr/machine.c
new file mode 100644
index 0000000000..e315442787
--- /dev/null
+++ b/qemu/target/avr/machine.c
@@ -0,0 +1,119 @@
+/*
+ * QEMU AVR CPU
+ *
+ * Copyright (c) 2016-2020 Michael Rolnik
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "migration/cpu.h"
+
+static int get_sreg(QEMUFile *f, void *opaque, size_t size,
+                    const VMStateField *field)
+{
+    CPUAVRState *env = opaque;
+    uint8_t sreg;
+
+    sreg = qemu_get_byte(f);
+    cpu_set_sreg(env, sreg);
+    return 0;
+}
+
+static int put_sreg(QEMUFile *f, void *opaque, size_t size,
+                    const VMStateField *field, QJSON *vmdesc)
+{
+    CPUAVRState *env = opaque;
+    uint8_t sreg = cpu_get_sreg(env);
+
+    qemu_put_byte(f, sreg);
+    return 0;
+}
+
+static const VMStateInfo vms_sreg = {
+    .name = "sreg",
+    .get = get_sreg,
+    .put = put_sreg,
+};
+
+static int get_segment(QEMUFile *f, void *opaque, size_t size,
+                       const VMStateField *field)
+{
+    uint32_t *ramp = opaque;
+    uint8_t temp;
+
+    temp = qemu_get_byte(f);
+    *ramp = ((uint32_t)temp) << 16;
+    return 0;
+}
+
+static int put_segment(QEMUFile *f, void *opaque, size_t size,
+                       const VMStateField *field, QJSON *vmdesc)
+{
+    uint32_t *ramp = opaque;
+    uint8_t temp = *ramp >> 16;
+
+    qemu_put_byte(f, temp);
+    return 0;
+}
+
+static const VMStateInfo vms_rampD = {
+    .name = "rampD",
+    .get = get_segment,
+    .put = put_segment,
+};
+static const VMStateInfo vms_rampX = {
+    .name = "rampX",
+    .get = get_segment,
+    .put = put_segment,
+};
+static const VMStateInfo vms_rampY = {
+    .name = "rampY",
+    .get = get_segment,
+    .put = put_segment,
+};
+static const VMStateInfo vms_rampZ = {
+    .name = "rampZ",
+    .get = get_segment,
+    .put = put_segment,
+};
+static const VMStateInfo vms_eind = {
+    .name = "eind",
+    .get = get_segment,
+    .put = put_segment,
+};
+
+const VMStateDescription vms_avr_cpu = {
+    .name = "cpu",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(env.pc_w, AVRCPU),
+        VMSTATE_UINT32(env.sp, AVRCPU),
+        VMSTATE_UINT32(env.skip, AVRCPU),
+
+        VMSTATE_UINT32_ARRAY(env.r, AVRCPU, NUMBER_OF_CPU_REGISTERS),
+
+        VMSTATE_SINGLE(env, AVRCPU, 0, vms_sreg, CPUAVRState),
+        VMSTATE_SINGLE(env.rampD, AVRCPU, 0, vms_rampD, uint32_t),
+        VMSTATE_SINGLE(env.rampX, AVRCPU, 0, vms_rampX, uint32_t),
+        VMSTATE_SINGLE(env.rampY, AVRCPU, 0, vms_rampY, uint32_t),
+        VMSTATE_SINGLE(env.rampZ, AVRCPU, 0, vms_rampZ, uint32_t),
+        VMSTATE_SINGLE(env.eind, AVRCPU, 0, vms_eind, uint32_t),
+
+        VMSTATE_END_OF_LIST()
+    }
+};
diff --git a/qemu/target/avr/translate.c b/qemu/target/avr/translate.c
new file mode 100644
index 0000000000..9ebc7dcf45
--- /dev/null
+++ b/qemu/target/avr/translate.c
@@ -0,0 +1,3270 @@
+/*
+ * QEMU AVR CPU
+ *
+ * Copyright (c) 2019-2020 Michael Rolnik
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg-op.h"
+#include "exec/cpu_ldst.h"
+#include "exec/helper-proto.h"
+#include "exec/helper-gen.h"
+#include "exec/translator.h"
+#include "exec/gen-icount.h"
+#include "unicorn_helper.h"
+
+#define gen_decl(func, ...) \
+    glue(gen_,func)(TCGContext *tcg_ctx, ## __VA_ARGS__)
+#define gen_call(func, ...) \
+    glue(gen_,func)(tcg_ctx, ## __VA_ARGS__)
+
+#define gen_io_end()            gen_call(io_end)
+#define gen_tb_start(...)       gen_call(tb_start, __VA_ARGS__)
+#define gen_tb_end(...)         gen_call(tb_end, __VA_ARGS__)
+
+#define gen_helper_call(name, ...)                      \
+    glue(gen_helper_,name)(tcg_ctx, ## __VA_ARGS__)
+#define gen_helper_unsupported(...) \
+    gen_helper_call(unsupported, __VA_ARGS__)
+
+#define gen_helper_debug(...)   gen_helper_call(debug, __VA_ARGS__)
+#define gen_helper_sleep(...)   gen_helper_call(sleep, __VA_ARGS__)
+#define gen_helper_inb(...)     gen_helper_call(inb, __VA_ARGS__)
+#define gen_helper_outb(...)    gen_helper_call(outb, __VA_ARGS__)
+#define gen_helper_fullrd(...)  gen_helper_call(fullrd, __VA_ARGS__)
+#define gen_helper_fullwr(...)  gen_helper_call(fullwr, __VA_ARGS__)
+#define gen_helper_wdr(...)     gen_helper_call(wdr, __VA_ARGS__)
+
+/*
+ *  Define if you want a BREAK instruction translated to a breakpoint
+ *  Active debugging connection is assumed
+ *  This is for
+ *  https://github.com/seharris/qemu-avr-tests/tree/master/instruction-tests
+ *  tests
+ */
+#undef BREAKPOINT_ON_BREAK
+
+#define cpu_pc                  (tcg_ctx->cpu_pc)
+#define cpu_Cf                  (tcg_ctx->cpu_Cf)
+#define cpu_Zf                  (tcg_ctx->cpu_ZF)
+#define cpu_Nf                  (tcg_ctx->cpu_NF)
+#define cpu_Vf                  (tcg_ctx->cpu_VF)
+#define cpu_Sf                  (tcg_ctx->cpu_Sf)
+#define cpu_Hf                  (tcg_ctx->cpu_Hf)
+#define cpu_Tf                  (tcg_ctx->cpu_Tf)
+#define cpu_If                  (tcg_ctx->cpu_If)
+#define cpu_rampD               (tcg_ctx->cpu_rampD)
+#define cpu_rampX               (tcg_ctx->cpu_rampX)
+#define cpu_rampY               (tcg_ctx->cpu_rampY)
+#define cpu_rampZ               (tcg_ctx->cpu_rampZ)
+#define cpu_r                   (tcg_ctx->cpu_gpr)
+#define cpu_eind                (tcg_ctx->cpu_eind)
+#define cpu_sp                  (tcg_ctx->cpu_sp)
+#define cpu_skip                (tcg_ctx->cpu_skip)
+
+static const char reg_names[NUMBER_OF_CPU_REGISTERS][8] = {
+    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+};
+#define REG(x) (cpu_r[x])
+
+enum {
+    DISAS_EXIT   = DISAS_TARGET_0,  /* We want return to the cpu main loop.  */
+    DISAS_LOOKUP = DISAS_TARGET_1,  /* We have a variable condition exit.  */
+    DISAS_CHAIN  = DISAS_TARGET_2,  /* We have a single condition exit.  */
+    DISAS_UC_EXIT = DISAS_TARGET_3, /* Unicorn: special state for exiting in the middle of tb.  */
+};
+
+typedef struct DisasContext DisasContext;
+
+/* This is the state at translation time. */
+struct DisasContext {
+    TranslationBlock *tb;
+
+    CPUAVRState *env;
+    CPUState *cs;
+
+    target_long npc;
+    uint32_t opcode;
+
+    /* Routine used to access memory */
+    int memidx;
+    int bstate;
+    int singlestep;
+
+    /*
+     * some AVR instructions can make the following instruction to be skipped
+     * Let's name those instructions
+     *     A   - instruction that can skip the next one
+     *     B   - instruction that can be skipped. this depends on execution of A
+     * there are two scenarios
+     * 1. A and B belong to the same translation block
+     * 2. A is the last instruction in the translation block and B is the last
+     *
+     * following variables are used to simplify the skipping logic, they are
+     * used in the following manner (sketch)
+     *
+     * TCGLabel *skip_label = NULL;
+     * if (ctx.skip_cond != TCG_COND_NEVER) {
+     *     skip_label = gen_new_label();
+     *     tcg_gen_brcond_tl(skip_cond, skip_var0, skip_var1, skip_label);
+     * }
+     *
+     * if (free_skip_var0) {
+     *     tcg_temp_free(skip_var0);
+     *     free_skip_var0 = false;
+     * }
+     *
+     * translate(&ctx);
+     *
+     * if (skip_label) {
+     *     gen_set_label(skip_label);
+     * }
+     */
+    TCGv skip_var0;
+    TCGv skip_var1;
+    TCGCond skip_cond;
+    bool free_skip_var0;
+};
+
+void avr_cpu_tcg_init(struct uc_struct *uc)
+{
+    int i;
+
+    INIT_TCG_CONTEXT_FROM_UC(uc);
+    INIT_CPU_ENV_FROM_TCG_CONTEXT(tcg_ctx);
+
+#define AVR_REG_OFFS(x) offsetof(CPUAVRState, x)
+    cpu_pc = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(pc_w), "pc");
+    cpu_Cf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregC), "Cf");
+    cpu_Zf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregZ), "Zf");
+    cpu_Nf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregN), "Nf");
+    cpu_Vf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregV), "Vf");
+    cpu_Sf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregS), "Sf");
+    cpu_Hf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregH), "Hf");
+    cpu_Tf = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregT), "Tf");
+    cpu_If = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sregI), "If");
+    cpu_rampD = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(rampD), "rampD");
+    cpu_rampX = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(rampX), "rampX");
+    cpu_rampY = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(rampY), "rampY");
+    cpu_rampZ = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(rampZ), "rampZ");
+    cpu_eind = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(eind), "eind");
+    cpu_sp = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(sp), "sp");
+    cpu_skip = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(skip), "skip");
+
+    for (i = 0; i < NUMBER_OF_CPU_REGISTERS; i++) {
+        cpu_r[i] = tcg_global_mem_new_i32(cpu_env, AVR_REG_OFFS(r[i]),
+                                          reg_names[i]);
+    }
+#undef AVR_REG_OFFS
+}
+
+static int to_regs_16_31_by_one(DisasContext *ctx, int indx)
+{
+    return 16 + (indx % 16);
+}
+
+static int to_regs_16_23_by_one(DisasContext *ctx, int indx)
+{
+    return 16 + (indx % 8);
+}
+
+static int to_regs_24_30_by_two(DisasContext *ctx, int indx)
+{
+    return 24 + (indx % 4) * 2;
+}
+
+static int to_regs_00_30_by_two(DisasContext *ctx, int indx)
+{
+    return (indx % 16) * 2;
+}
+
+static uint16_t next_word(DisasContext *ctx)
+{
+    // Unicorn:
+    return cpu_lduw_code(ctx->env, avr_code_base(ctx->env) | (ctx->npc++ * 2));
+}
+
+static int append_16(DisasContext *ctx, int x)
+{
+    return x << 16 | next_word(ctx);
+}
+
+static bool avr_have_feature(DisasContext *ctx, int feature)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    if (!avr_feature(ctx->env, feature)) {
+        gen_helper_unsupported(cpu_env);
+        ctx->bstate = DISAS_NORETURN;
+        return false;
+    }
+    return true;
+}
+
+static bool decode_insn(DisasContext *ctx, uint16_t insn);
+#include "decode-insn.c.inc"
+
+/*
+ * Arithmetic Instructions
+ */
+
+/*
+ * Utility functions for updating status registers:
+ *
+ *   - gen_add_CHf()
+ *   - gen_add_Vf()
+ *   - gen_sub_CHf()
+ *   - gen_sub_Vf()
+ *   - gen_NSf()
+ *   - gen_ZNSf()
+ *
+ */
+
+static void gen_decl(add_CHf, TCGv R, TCGv Rd, TCGv Rr)
+{
+    TCGv t1 = tcg_temp_new_i32();
+    TCGv t2 = tcg_temp_new_i32();
+    TCGv t3 = tcg_temp_new_i32();
+
+    tcg_gen_and_tl(t1, Rd, Rr); /* t1 = Rd & Rr */
+    tcg_gen_andc_tl(t2, Rd, R); /* t2 = Rd & ~R */
+    tcg_gen_andc_tl(t3, Rr, R); /* t3 = Rr & ~R */
+    tcg_gen_or_tl(t1, t1, t2); /* t1 = t1 | t2 | t3 */
+    tcg_gen_or_tl(t1, t1, t3);
+
+    tcg_gen_shri_tl(cpu_Cf, t1, 7); /* Cf = t1(7) */
+    tcg_gen_shri_tl(cpu_Hf, t1, 3); /* Hf = t1(3) */
+    tcg_gen_andi_tl(cpu_Hf, cpu_Hf, 1);
+
+    tcg_temp_free_i32(t3);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t1);
+}
+
+static void gen_decl(add_Vf, TCGv R, TCGv Rd, TCGv Rr)
+{
+    TCGv t1 = tcg_temp_new_i32();
+    TCGv t2 = tcg_temp_new_i32();
+
+    /* t1 = Rd & Rr & ~R | ~Rd & ~Rr & R */
+    /*    = (Rd ^ R) & ~(Rd ^ Rr) */
+    tcg_gen_xor_tl(t1, Rd, R);
+    tcg_gen_xor_tl(t2, Rd, Rr);
+    tcg_gen_andc_tl(t1, t1, t2);
+
+    tcg_gen_shri_tl(cpu_Vf, t1, 7); /* Vf = t1(7) */
+
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t1);
+}
+
+static void gen_decl(sub_CHf, TCGv R, TCGv Rd, TCGv Rr)
+{
+    TCGv t1 = tcg_temp_new_i32();
+    TCGv t2 = tcg_temp_new_i32();
+    TCGv t3 = tcg_temp_new_i32();
+
+    tcg_gen_not_tl(t1, Rd); /* t1 = ~Rd */
+    tcg_gen_and_tl(t2, t1, Rr); /* t2 = ~Rd & Rr */
+    tcg_gen_or_tl(t3, t1, Rr); /* t3 = (~Rd | Rr) & R */
+    tcg_gen_and_tl(t3, t3, R);
+    tcg_gen_or_tl(t2, t2, t3); /* t2 = ~Rd & Rr | ~Rd & R | R & Rr */
+
+    tcg_gen_shri_tl(cpu_Cf, t2, 7); /* Cf = t2(7) */
+    tcg_gen_shri_tl(cpu_Hf, t2, 3); /* Hf = t2(3) */
+    tcg_gen_andi_tl(cpu_Hf, cpu_Hf, 1);
+
+    tcg_temp_free_i32(t3);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t1);
+}
+
+static void gen_decl(sub_Vf, TCGv R, TCGv Rd, TCGv Rr)
+{
+    TCGv t1 = tcg_temp_new_i32();
+    TCGv t2 = tcg_temp_new_i32();
+
+    /* t1 = Rd & ~Rr & ~R | ~Rd & Rr & R */
+    /*    = (Rd ^ R) & (Rd ^ R) */
+    tcg_gen_xor_tl(t1, Rd, R);
+    tcg_gen_xor_tl(t2, Rd, Rr);
+    tcg_gen_and_tl(t1, t1, t2);
+
+    tcg_gen_shri_tl(cpu_Vf, t1, 7); /* Vf = t1(7) */
+
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t1);
+}
+
+static void gen_decl(NSf, TCGv R)
+{
+    tcg_gen_shri_tl(cpu_Nf, R, 7); /* Nf = R(7) */
+    tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf); /* Sf = Nf ^ Vf */
+}
+
+static void gen_decl(ZNSf, TCGv R)
+{
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+
+    /* update status register */
+    tcg_gen_shri_tl(cpu_Nf, R, 7); /* Nf = R(7) */
+    tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf); /* Sf = Nf ^ Vf */
+}
+
+#define gen_add_CHf(...)        gen_call(add_CHf, __VA_ARGS__)
+#define gen_add_Vf(...)         gen_call(add_Vf, __VA_ARGS__)
+#define gen_sub_CHf(...)        gen_call(sub_CHf, __VA_ARGS__)
+#define gen_sub_Vf(...)         gen_call(sub_Vf, __VA_ARGS__)
+#define gen_NSf(...)            gen_call(NSf, __VA_ARGS__)
+#define gen_ZNSf(...)           gen_call(ZNSf, __VA_ARGS__)
+
+#define gen_new_label_avr()     gen_call(new_label_avr)
+#define gen_set_label(...)      gen_call(set_label, __VA_ARGS__)
+
+/*
+ *  Adds two registers without the C Flag and places the result in the
+ *  destination register Rd.
+ */
+static bool trans_ADD(DisasContext *ctx, arg_ADD *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_add_tl(R, Rd, Rr); /* Rd = Rd + Rr */
+    tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */
+
+    /* update status register */
+    gen_add_CHf(R, Rd, Rr);
+    gen_add_Vf(R, Rd, Rr);
+    gen_ZNSf(R);
+
+    /* update output registers */
+    tcg_gen_mov_tl(Rd, R);
+
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  Adds two registers and the contents of the C Flag and places the result in
+ *  the destination register Rd.
+ */
+static bool trans_ADC(DisasContext *ctx, arg_ADC *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_add_tl(R, Rd, Rr); /* R = Rd + Rr + Cf */
+    tcg_gen_add_tl(R, R, cpu_Cf);
+    tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */
+
+    /* update status register */
+    gen_add_CHf(R, Rd, Rr);
+    gen_add_Vf(R, Rd, Rr);
+    gen_ZNSf(R);
+
+    /* update output registers */
+    tcg_gen_mov_tl(Rd, R);
+
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  Adds an immediate value (0 - 63) to a register pair and places the result
+ *  in the register pair. This instruction operates on the upper four register
+ *  pairs, and is well suited for operations on the pointer registers.  This
+ *  instruction is not available in all devices. Refer to the device specific
+ *  instruction set summary.
+ */
+static bool trans_ADIW(DisasContext *ctx, arg_ADIW *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_ADIW_SBIW)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv RdL = cpu_r[a->rd];
+    TCGv RdH = cpu_r[a->rd + 1];
+    int Imm = (a->imm);
+    TCGv R = tcg_temp_new_i32();
+    TCGv Rd = tcg_temp_new_i32();
+
+    tcg_gen_deposit_tl(Rd, RdL, RdH, 8, 8); /* Rd = RdH:RdL */
+    tcg_gen_addi_tl(R, Rd, Imm); /* R = Rd + Imm */
+    tcg_gen_andi_tl(R, R, 0xffff); /* make it 16 bits */
+
+    /* update status register */
+    tcg_gen_andc_tl(cpu_Cf, Rd, R); /* Cf = Rd & ~R */
+    tcg_gen_shri_tl(cpu_Cf, cpu_Cf, 15);
+    tcg_gen_andc_tl(cpu_Vf, R, Rd); /* Vf = R & ~Rd */
+    tcg_gen_shri_tl(cpu_Vf, cpu_Vf, 15);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+    tcg_gen_shri_tl(cpu_Nf, R, 15); /* Nf = R(15) */
+    tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf);/* Sf = Nf ^ Vf */
+
+    /* update output registers */
+    tcg_gen_andi_tl(RdL, R, 0xff);
+    tcg_gen_shri_tl(RdH, R, 8);
+
+    tcg_temp_free_i32(Rd);
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  Subtracts two registers and places the result in the destination
+ *  register Rd.
+ */
+static bool trans_SUB(DisasContext *ctx, arg_SUB *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr */
+    tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */
+
+    /* update status register */
+    tcg_gen_andc_tl(cpu_Cf, Rd, R); /* Cf = Rd & ~R */
+    gen_sub_CHf(R, Rd, Rr);
+    gen_sub_Vf(R, Rd, Rr);
+    gen_ZNSf(R);
+
+    /* update output registers */
+    tcg_gen_mov_tl(Rd, R);
+
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  Subtracts a register and a constant and places the result in the
+ *  destination register Rd. This instruction is working on Register R16 to R31
+ *  and is very well suited for operations on the X, Y, and Z-pointers.
+ */
+static bool trans_SUBI(DisasContext *ctx, arg_SUBI *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = tcg_const_i32(a->imm);
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Imm */
+    tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */
+
+    /* update status register */
+    gen_sub_CHf(R, Rd, Rr);
+    gen_sub_Vf(R, Rd, Rr);
+    gen_ZNSf(R);
+
+    /* update output registers */
+    tcg_gen_mov_tl(Rd, R);
+
+    tcg_temp_free_i32(R);
+    tcg_temp_free_i32(Rr);
+
+    return true;
+}
+
+/*
+ *  Subtracts two registers and subtracts with the C Flag and places the
+ *  result in the destination register Rd.
+ */
+static bool trans_SBC(DisasContext *ctx, arg_SBC *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+    TCGv zero = tcg_const_i32(0);
+
+    tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr - Cf */
+    tcg_gen_sub_tl(R, R, cpu_Cf);
+    tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */
+
+    /* update status register */
+    gen_sub_CHf(R, Rd, Rr);
+    gen_sub_Vf(R, Rd, Rr);
+    gen_NSf(R);
+
+    /*
+     * Previous value remains unchanged when the result is zero;
+     * cleared otherwise.
+     */
+    tcg_gen_movcond_tl(TCG_COND_EQ, cpu_Zf, R, zero, cpu_Zf, zero);
+
+    /* update output registers */
+    tcg_gen_mov_tl(Rd, R);
+
+    tcg_temp_free_i32(zero);
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  SBCI -- Subtract Immediate with Carry
+ */
+static bool trans_SBCI(DisasContext *ctx, arg_SBCI *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = tcg_const_i32(a->imm);
+    TCGv R = tcg_temp_new_i32();
+    TCGv zero = tcg_const_i32(0);
+
+    tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr - Cf */
+    tcg_gen_sub_tl(R, R, cpu_Cf);
+    tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */
+
+    /* update status register */
+    gen_sub_CHf(R, Rd, Rr);
+    gen_sub_Vf(R, Rd, Rr);
+    gen_NSf(R);
+
+    /*
+     * Previous value remains unchanged when the result is zero;
+     * cleared otherwise.
+     */
+    tcg_gen_movcond_tl(TCG_COND_EQ, cpu_Zf, R, zero, cpu_Zf, zero);
+
+    /* update output registers */
+    tcg_gen_mov_tl(Rd, R);
+
+    tcg_temp_free_i32(zero);
+    tcg_temp_free_i32(R);
+    tcg_temp_free_i32(Rr);
+
+    return true;
+}
+
+/*
+ *  Subtracts an immediate value (0-63) from a register pair and places the
+ *  result in the register pair. This instruction operates on the upper four
+ *  register pairs, and is well suited for operations on the Pointer Registers.
+ *  This instruction is not available in all devices. Refer to the device
+ *  specific instruction set summary.
+ */
+static bool trans_SBIW(DisasContext *ctx, arg_SBIW *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_ADIW_SBIW)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv RdL = cpu_r[a->rd];
+    TCGv RdH = cpu_r[a->rd + 1];
+    int Imm = (a->imm);
+    TCGv R = tcg_temp_new_i32();
+    TCGv Rd = tcg_temp_new_i32();
+
+    tcg_gen_deposit_tl(Rd, RdL, RdH, 8, 8); /* Rd = RdH:RdL */
+    tcg_gen_subi_tl(R, Rd, Imm); /* R = Rd - Imm */
+    tcg_gen_andi_tl(R, R, 0xffff); /* make it 16 bits */
+
+    /* update status register */
+    tcg_gen_andc_tl(cpu_Cf, R, Rd);
+    tcg_gen_shri_tl(cpu_Cf, cpu_Cf, 15); /* Cf = R & ~Rd */
+    tcg_gen_andc_tl(cpu_Vf, Rd, R);
+    tcg_gen_shri_tl(cpu_Vf, cpu_Vf, 15); /* Vf = Rd & ~R */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+    tcg_gen_shri_tl(cpu_Nf, R, 15); /* Nf = R(15) */
+    tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf); /* Sf = Nf ^ Vf */
+
+    /* update output registers */
+    tcg_gen_andi_tl(RdL, R, 0xff);
+    tcg_gen_shri_tl(RdH, R, 8);
+
+    tcg_temp_free_i32(Rd);
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  Performs the logical AND between the contents of register Rd and register
+ *  Rr and places the result in the destination register Rd.
+ */
+static bool trans_AND(DisasContext *ctx, arg_AND *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_and_tl(R, Rd, Rr); /* Rd = Rd and Rr */
+
+    /* update status register */
+    tcg_gen_movi_tl(cpu_Vf, 0); /* Vf = 0 */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+    gen_ZNSf(R);
+
+    /* update output registers */
+    tcg_gen_mov_tl(Rd, R);
+
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  Performs the logical AND between the contents of register Rd and a constant
+ *  and places the result in the destination register Rd.
+ */
+static bool trans_ANDI(DisasContext *ctx, arg_ANDI *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    int Imm = (a->imm);
+
+    tcg_gen_andi_tl(Rd, Rd, Imm); /* Rd = Rd & Imm */
+
+    /* update status register */
+    tcg_gen_movi_tl(cpu_Vf, 0x00); /* Vf = 0 */
+    gen_ZNSf(Rd);
+
+    return true;
+}
+
+/*
+ *  Performs the logical OR between the contents of register Rd and register
+ *  Rr and places the result in the destination register Rd.
+ */
+static bool trans_OR(DisasContext *ctx, arg_OR *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_or_tl(R, Rd, Rr);
+
+    /* update status register */
+    tcg_gen_movi_tl(cpu_Vf, 0);
+    gen_ZNSf(R);
+
+    /* update output registers */
+    tcg_gen_mov_tl(Rd, R);
+
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  Performs the logical OR between the contents of register Rd and a
+ *  constant and places the result in the destination register Rd.
+ */
+static bool trans_ORI(DisasContext *ctx, arg_ORI *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    int Imm = (a->imm);
+
+    tcg_gen_ori_tl(Rd, Rd, Imm); /* Rd = Rd | Imm */
+
+    /* update status register */
+    tcg_gen_movi_tl(cpu_Vf, 0x00); /* Vf = 0 */
+    gen_ZNSf(Rd);
+
+    return true;
+}
+
+/*
+ *  Performs the logical EOR between the contents of register Rd and
+ *  register Rr and places the result in the destination register Rd.
+ */
+static bool trans_EOR(DisasContext *ctx, arg_EOR *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+
+    tcg_gen_xor_tl(Rd, Rd, Rr);
+
+    /* update status register */
+    tcg_gen_movi_tl(cpu_Vf, 0);
+    gen_ZNSf(Rd);
+
+    return true;
+}
+
+/*
+ *  Clears the specified bits in register Rd. Performs the logical AND
+ *  between the contents of register Rd and the complement of the constant mask
+ *  K. The result will be placed in register Rd.
+ */
+static bool trans_COM(DisasContext *ctx, arg_COM *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_xori_tl(Rd, Rd, 0xff);
+
+    /* update status register */
+    tcg_gen_movi_tl(cpu_Cf, 1); /* Cf = 1 */
+    tcg_gen_movi_tl(cpu_Vf, 0); /* Vf = 0 */
+    gen_ZNSf(Rd);
+
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  Replaces the contents of register Rd with its two's complement; the
+ *  value $80 is left unchanged.
+ */
+static bool trans_NEG(DisasContext *ctx, arg_NEG *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv t0 = tcg_const_i32(0);
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_sub_tl(R, t0, Rd); /* R = 0 - Rd */
+    tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */
+
+    /* update status register */
+    gen_sub_CHf(R, t0, Rd);
+    gen_sub_Vf(R, t0, Rd);
+    gen_ZNSf(R);
+
+    /* update output registers */
+    tcg_gen_mov_tl(Rd, R);
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  Adds one -1- to the contents of register Rd and places the result in the
+ *  destination register Rd.  The C Flag in SREG is not affected by the
+ *  operation, thus allowing the INC instruction to be used on a loop counter in
+ *  multiple-precision computations.  When operating on unsigned numbers, only
+ *  BREQ and BRNE branches can be expected to perform consistently. When
+ *  operating on two's complement values, all signed branches are available.
+ */
+static bool trans_INC(DisasContext *ctx, arg_INC *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+
+    tcg_gen_addi_tl(Rd, Rd, 1);
+    tcg_gen_andi_tl(Rd, Rd, 0xff);
+
+    /* update status register */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Vf, Rd, 0x80); /* Vf = Rd == 0x80 */
+    gen_ZNSf(Rd);
+
+    return true;
+}
+
+/*
+ *  Subtracts one -1- from the contents of register Rd and places the result
+ *  in the destination register Rd.  The C Flag in SREG is not affected by the
+ *  operation, thus allowing the DEC instruction to be used on a loop counter in
+ *  multiple-precision computations.  When operating on unsigned values, only
+ *  BREQ and BRNE branches can be expected to perform consistently.  When
+ *  operating on two's complement values, all signed branches are available.
+ */
+static bool trans_DEC(DisasContext *ctx, arg_DEC *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+
+    tcg_gen_subi_tl(Rd, Rd, 1); /* Rd = Rd - 1 */
+    tcg_gen_andi_tl(Rd, Rd, 0xff); /* make it 8 bits */
+
+    /* update status register */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Vf, Rd, 0x7f); /* Vf = Rd == 0x7f */
+    gen_ZNSf(Rd);
+
+    return true;
+}
+
+/*
+ *  This instruction performs 8-bit x 8-bit -> 16-bit unsigned multiplication.
+ */
+static bool trans_MUL(DisasContext *ctx, arg_MUL *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv R0 = cpu_r[0];
+    TCGv R1 = cpu_r[1];
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_mul_tl(R, Rd, Rr); /* R = Rd * Rr */
+    tcg_gen_andi_tl(R0, R, 0xff);
+    tcg_gen_shri_tl(R1, R, 8);
+
+    /* update status register */
+    tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  This instruction performs 8-bit x 8-bit -> 16-bit signed multiplication.
+ */
+static bool trans_MULS(DisasContext *ctx, arg_MULS *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv R0 = cpu_r[0];
+    TCGv R1 = cpu_r[1];
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+    TCGv t0 = tcg_temp_new_i32();
+    TCGv t1 = tcg_temp_new_i32();
+
+    tcg_gen_ext8s_tl(t0, Rd); /* make Rd full 32 bit signed */
+    tcg_gen_ext8s_tl(t1, Rr); /* make Rr full 32 bit signed */
+    tcg_gen_mul_tl(R, t0, t1); /* R = Rd * Rr */
+    tcg_gen_andi_tl(R, R, 0xffff); /* make it 16 bits */
+    tcg_gen_andi_tl(R0, R, 0xff);
+    tcg_gen_shri_tl(R1, R, 8);
+
+    /* update status register */
+    tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  This instruction performs 8-bit x 8-bit -> 16-bit multiplication of a
+ *  signed and an unsigned number.
+ */
+static bool trans_MULSU(DisasContext *ctx, arg_MULSU *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv R0 = cpu_r[0];
+    TCGv R1 = cpu_r[1];
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+    TCGv t0 = tcg_temp_new_i32();
+
+    tcg_gen_ext8s_tl(t0, Rd); /* make Rd full 32 bit signed */
+    tcg_gen_mul_tl(R, t0, Rr); /* R = Rd * Rr */
+    tcg_gen_andi_tl(R, R, 0xffff); /* make R 16 bits */
+    tcg_gen_andi_tl(R0, R, 0xff);
+    tcg_gen_shri_tl(R1, R, 8);
+
+    /* update status register */
+    tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  This instruction performs 8-bit x 8-bit -> 16-bit unsigned
+ *  multiplication and shifts the result one bit left.
+ */
+static bool trans_FMUL(DisasContext *ctx, arg_FMUL *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv R0 = cpu_r[0];
+    TCGv R1 = cpu_r[1];
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_mul_tl(R, Rd, Rr); /* R = Rd * Rr */
+
+    /* update status register */
+    tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+
+    /* update output registers */
+    tcg_gen_shli_tl(R, R, 1);
+    tcg_gen_andi_tl(R0, R, 0xff);
+    tcg_gen_shri_tl(R1, R, 8);
+    tcg_gen_andi_tl(R1, R1, 0xff);
+
+
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  This instruction performs 8-bit x 8-bit -> 16-bit signed multiplication
+ *  and shifts the result one bit left.
+ */
+static bool trans_FMULS(DisasContext *ctx, arg_FMULS *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv R0 = cpu_r[0];
+    TCGv R1 = cpu_r[1];
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+    TCGv t0 = tcg_temp_new_i32();
+    TCGv t1 = tcg_temp_new_i32();
+
+    tcg_gen_ext8s_tl(t0, Rd); /* make Rd full 32 bit signed */
+    tcg_gen_ext8s_tl(t1, Rr); /* make Rr full 32 bit signed */
+    tcg_gen_mul_tl(R, t0, t1); /* R = Rd * Rr */
+    tcg_gen_andi_tl(R, R, 0xffff); /* make it 16 bits */
+
+    /* update status register */
+    tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+
+    /* update output registers */
+    tcg_gen_shli_tl(R, R, 1);
+    tcg_gen_andi_tl(R0, R, 0xff);
+    tcg_gen_shri_tl(R1, R, 8);
+    tcg_gen_andi_tl(R1, R1, 0xff);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  This instruction performs 8-bit x 8-bit -> 16-bit signed multiplication
+ *  and shifts the result one bit left.
+ */
+static bool trans_FMULSU(DisasContext *ctx, arg_FMULSU *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_MUL)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv R0 = cpu_r[0];
+    TCGv R1 = cpu_r[1];
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+    TCGv t0 = tcg_temp_new_i32();
+
+    tcg_gen_ext8s_tl(t0, Rd); /* make Rd full 32 bit signed */
+    tcg_gen_mul_tl(R, t0, Rr); /* R = Rd * Rr */
+    tcg_gen_andi_tl(R, R, 0xffff); /* make it 16 bits */
+
+    /* update status register */
+    tcg_gen_shri_tl(cpu_Cf, R, 15); /* Cf = R(15) */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+
+    /* update output registers */
+    tcg_gen_shli_tl(R, R, 1);
+    tcg_gen_andi_tl(R0, R, 0xff);
+    tcg_gen_shri_tl(R1, R, 8);
+    tcg_gen_andi_tl(R1, R1, 0xff);
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  The module is an instruction set extension to the AVR CPU, performing
+ *  DES iterations. The 64-bit data block (plaintext or ciphertext) is placed in
+ *  the CPU register file, registers R0-R7, where LSB of data is placed in LSB
+ *  of R0 and MSB of data is placed in MSB of R7. The full 64-bit key (including
+ *  parity bits) is placed in registers R8- R15, organized in the register file
+ *  with LSB of key in LSB of R8 and MSB of key in MSB of R15. Executing one DES
+ *  instruction performs one round in the DES algorithm. Sixteen rounds must be
+ *  executed in increasing order to form the correct DES ciphertext or
+ *  plaintext. Intermediate results are stored in the register file (R0-R15)
+ *  after each DES instruction. The instruction's operand (K) determines which
+ *  round is executed, and the half carry flag (H) determines whether encryption
+ *  or decryption is performed.  The DES algorithm is described in
+ *  "Specifications for the Data Encryption Standard" (Federal Information
+ *  Processing Standards Publication 46). Intermediate results in this
+ *  implementation differ from the standard because the initial permutation and
+ *  the inverse initial permutation are performed each iteration. This does not
+ *  affect the result in the final ciphertext or plaintext, but reduces
+ *  execution time.
+ */
+static bool trans_DES(DisasContext *ctx, arg_DES *a)
+{
+    /* TODO */
+    if (!avr_have_feature(ctx, AVR_FEATURE_DES)) {
+        return true;
+    }
+
+    qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
+
+    return true;
+}
+
+/*
+ * Branch Instructions
+ */
+static void gen_jmp_ez(DisasContext *ctx)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    tcg_gen_deposit_tl(cpu_pc, cpu_r[30], cpu_r[31], 8, 8);
+    tcg_gen_or_tl(cpu_pc, cpu_pc, cpu_eind);
+    ctx->bstate = DISAS_LOOKUP;
+}
+
+static void gen_jmp_z(DisasContext *ctx)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    tcg_gen_deposit_tl(cpu_pc, cpu_r[30], cpu_r[31], 8, 8);
+    ctx->bstate = DISAS_LOOKUP;
+}
+
+static void gen_push_ret(DisasContext *ctx, int ret)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    if (avr_feature(ctx->env, AVR_FEATURE_1_BYTE_PC)) {
+
+        TCGv t0 = tcg_const_i32((ret & 0x0000ff));
+
+        tcg_gen_qemu_st_tl(t0, cpu_sp, MMU_DATA_IDX, MO_UB);
+        tcg_gen_subi_tl(cpu_sp, cpu_sp, 1);
+
+        tcg_temp_free_i32(t0);
+    } else if (avr_feature(ctx->env, AVR_FEATURE_2_BYTE_PC)) {
+
+        TCGv t0 = tcg_const_i32((ret & 0x00ffff));
+
+        tcg_gen_subi_tl(cpu_sp, cpu_sp, 1);
+        tcg_gen_qemu_st_tl(t0, cpu_sp, MMU_DATA_IDX, MO_BEUW);
+        tcg_gen_subi_tl(cpu_sp, cpu_sp, 1);
+
+        tcg_temp_free_i32(t0);
+
+    } else if (avr_feature(ctx->env, AVR_FEATURE_3_BYTE_PC)) {
+
+        TCGv lo = tcg_const_i32((ret & 0x0000ff));
+        TCGv hi = tcg_const_i32((ret & 0xffff00) >> 8);
+
+        tcg_gen_qemu_st_tl(lo, cpu_sp, MMU_DATA_IDX, MO_UB);
+        tcg_gen_subi_tl(cpu_sp, cpu_sp, 2);
+        tcg_gen_qemu_st_tl(hi, cpu_sp, MMU_DATA_IDX, MO_BEUW);
+        tcg_gen_subi_tl(cpu_sp, cpu_sp, 1);
+
+        tcg_temp_free_i32(lo);
+        tcg_temp_free_i32(hi);
+    }
+}
+
+static void gen_pop_ret(DisasContext *ctx, TCGv ret)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    if (avr_feature(ctx->env, AVR_FEATURE_1_BYTE_PC)) {
+        tcg_gen_addi_tl(cpu_sp, cpu_sp, 1);
+        tcg_gen_qemu_ld_tl(ret, cpu_sp, MMU_DATA_IDX, MO_UB);
+    } else if (avr_feature(ctx->env, AVR_FEATURE_2_BYTE_PC)) {
+        tcg_gen_addi_tl(cpu_sp, cpu_sp, 1);
+        tcg_gen_qemu_ld_tl(ret, cpu_sp, MMU_DATA_IDX, MO_BEUW);
+        tcg_gen_addi_tl(cpu_sp, cpu_sp, 1);
+    } else if (avr_feature(ctx->env, AVR_FEATURE_3_BYTE_PC)) {
+        TCGv lo = tcg_temp_new_i32();
+        TCGv hi = tcg_temp_new_i32();
+
+        tcg_gen_addi_tl(cpu_sp, cpu_sp, 1);
+        tcg_gen_qemu_ld_tl(hi, cpu_sp, MMU_DATA_IDX, MO_BEUW);
+
+        tcg_gen_addi_tl(cpu_sp, cpu_sp, 2);
+        tcg_gen_qemu_ld_tl(lo, cpu_sp, MMU_DATA_IDX, MO_UB);
+
+        tcg_gen_deposit_tl(ret, lo, hi, 8, 16);
+
+        tcg_temp_free_i32(lo);
+        tcg_temp_free_i32(hi);
+    }
+}
+
+static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    TranslationBlock *tb = ctx->tb;
+
+    if (ctx->singlestep == 0) {
+        tcg_gen_goto_tb(n);
+        tcg_gen_movi_i32(cpu_pc, dest);
+        tcg_gen_exit_tb(tb, n);
+    } else {
+        tcg_gen_movi_i32(cpu_pc, dest);
+        gen_helper_debug(cpu_env);
+        tcg_gen_exit_tb(NULL, 0);
+    }
+    ctx->bstate = DISAS_NORETURN;
+}
+
+/*
+ *  Relative jump to an address within PC - 2K +1 and PC + 2K (words). For
+ *  AVR microcontrollers with Program memory not exceeding 4K words (8KB) this
+ *  instruction can address the entire memory from every address location. See
+ *  also JMP.
+ */
+static bool trans_RJMP(DisasContext *ctx, arg_RJMP *a)
+{
+    int dst = ctx->npc + a->imm;
+
+    gen_goto_tb(ctx, 0, dst);
+
+    return true;
+}
+
+/*
+ *  Indirect jump to the address pointed to by the Z (16 bits) Pointer
+ *  Register in the Register File. The Z-pointer Register is 16 bits wide and
+ *  allows jump within the lowest 64K words (128KB) section of Program memory.
+ *  This instruction is not available in all devices. Refer to the device
+ *  specific instruction set summary.
+ */
+static bool trans_IJMP(DisasContext *ctx, arg_IJMP *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_IJMP_ICALL)) {
+        return true;
+    }
+
+    gen_jmp_z(ctx);
+
+    return true;
+}
+
+/*
+ *  Indirect jump to the address pointed to by the Z (16 bits) Pointer
+ *  Register in the Register File and the EIND Register in the I/O space. This
+ *  instruction allows for indirect jumps to the entire 4M (words) Program
+ *  memory space. See also IJMP.  This instruction is not available in all
+ *  devices. Refer to the device specific instruction set summary.
+ */
+static bool trans_EIJMP(DisasContext *ctx, arg_EIJMP *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_EIJMP_EICALL)) {
+        return true;
+    }
+
+    gen_jmp_ez(ctx);
+    return true;
+}
+
+/*
+ *  Jump to an address within the entire 4M (words) Program memory. See also
+ *  RJMP.  This instruction is not available in all devices. Refer to the device
+ *  specific instruction set summary.0
+ */
+static bool trans_JMP(DisasContext *ctx, arg_JMP *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_JMP_CALL)) {
+        return true;
+    }
+
+    gen_goto_tb(ctx, 0, a->imm);
+
+    return true;
+}
+
+/*
+ *  Relative call to an address within PC - 2K + 1 and PC + 2K (words). The
+ *  return address (the instruction after the RCALL) is stored onto the Stack.
+ *  See also CALL. For AVR microcontrollers with Program memory not exceeding 4K
+ *  words (8KB) this instruction can address the entire memory from every
+ *  address location. The Stack Pointer uses a post-decrement scheme during
+ *  RCALL.
+ */
+static bool trans_RCALL(DisasContext *ctx, arg_RCALL *a)
+{
+    int ret = ctx->npc;
+    int dst = ctx->npc + a->imm;
+
+    gen_push_ret(ctx, ret);
+    gen_goto_tb(ctx, 0, dst);
+
+    return true;
+}
+
+/*
+ *  Calls to a subroutine within the entire 4M (words) Program memory. The
+ *  return address (to the instruction after the CALL) will be stored onto the
+ *  Stack. See also RCALL. The Stack Pointer uses a post-decrement scheme during
+ *  CALL.  This instruction is not available in all devices. Refer to the device
+ *  specific instruction set summary.
+ */
+static bool trans_ICALL(DisasContext *ctx, arg_ICALL *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_IJMP_ICALL)) {
+        return true;
+    }
+
+    int ret = ctx->npc;
+
+    gen_push_ret(ctx, ret);
+    gen_jmp_z(ctx);
+
+    return true;
+}
+
+/*
+ *  Indirect call of a subroutine pointed to by the Z (16 bits) Pointer
+ *  Register in the Register File and the EIND Register in the I/O space. This
+ *  instruction allows for indirect calls to the entire 4M (words) Program
+ *  memory space. See also ICALL. The Stack Pointer uses a post-decrement scheme
+ *  during EICALL.  This instruction is not available in all devices. Refer to
+ *  the device specific instruction set summary.
+ */
+static bool trans_EICALL(DisasContext *ctx, arg_EICALL *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_EIJMP_EICALL)) {
+        return true;
+    }
+
+    int ret = ctx->npc;
+
+    gen_push_ret(ctx, ret);
+    gen_jmp_ez(ctx);
+    return true;
+}
+
+/*
+ *  Calls to a subroutine within the entire Program memory. The return
+ *  address (to the instruction after the CALL) will be stored onto the Stack.
+ *  (See also RCALL). The Stack Pointer uses a post-decrement scheme during
+ *  CALL.  This instruction is not available in all devices. Refer to the device
+ *  specific instruction set summary.
+ */
+static bool trans_CALL(DisasContext *ctx, arg_CALL *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_JMP_CALL)) {
+        return true;
+    }
+
+    int Imm = a->imm;
+    int ret = ctx->npc;
+
+    gen_push_ret(ctx, ret);
+    gen_goto_tb(ctx, 0, Imm);
+
+    return true;
+}
+
+/*
+ *  Returns from subroutine. The return address is loaded from the STACK.
+ *  The Stack Pointer uses a preincrement scheme during RET.
+ */
+static bool trans_RET(DisasContext *ctx, arg_RET *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    gen_pop_ret(ctx, cpu_pc);
+
+    ctx->bstate = DISAS_LOOKUP;
+    return true;
+}
+
+/*
+ *  Returns from interrupt. The return address is loaded from the STACK and
+ *  the Global Interrupt Flag is set.  Note that the Status Register is not
+ *  automatically stored when entering an interrupt routine, and it is not
+ *  restored when returning from an interrupt routine. This must be handled by
+ *  the application program. The Stack Pointer uses a pre-increment scheme
+ *  during RETI.
+ */
+static bool trans_RETI(DisasContext *ctx, arg_RETI *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    gen_pop_ret(ctx, cpu_pc);
+    tcg_gen_movi_tl(cpu_If, 1);
+
+    /* Need to return to main loop to re-evaluate interrupts.  */
+    ctx->bstate = DISAS_EXIT;
+    return true;
+}
+
+/*
+ *  This instruction performs a compare between two registers Rd and Rr, and
+ *  skips the next instruction if Rd = Rr.
+ */
+static bool trans_CPSE(DisasContext *ctx, arg_CPSE *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    ctx->skip_cond = TCG_COND_EQ;
+    ctx->skip_var0 = cpu_r[a->rd];
+    ctx->skip_var1 = cpu_r[a->rr];
+    return true;
+}
+
+/*
+ *  This instruction performs a compare between two registers Rd and Rr.
+ *  None of the registers are changed. All conditional branches can be used
+ *  after this instruction.
+ */
+static bool trans_CP(DisasContext *ctx, arg_CP *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr */
+    tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */
+
+    /* update status register */
+    gen_sub_CHf(R, Rd, Rr);
+    gen_sub_Vf(R, Rd, Rr);
+    gen_ZNSf(R);
+
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  This instruction performs a compare between two registers Rd and Rr and
+ *  also takes into account the previous carry. None of the registers are
+ *  changed. All conditional branches can be used after this instruction.
+ */
+static bool trans_CPC(DisasContext *ctx, arg_CPC *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+    TCGv R = tcg_temp_new_i32();
+    TCGv zero = tcg_const_i32(0);
+
+    tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr - Cf */
+    tcg_gen_sub_tl(R, R, cpu_Cf);
+    tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */
+    /* update status register */
+    gen_sub_CHf(R, Rd, Rr);
+    gen_sub_Vf(R, Rd, Rr);
+    gen_NSf(R);
+
+    /*
+     * Previous value remains unchanged when the result is zero;
+     * cleared otherwise.
+     */
+    tcg_gen_movcond_tl(TCG_COND_EQ, cpu_Zf, R, zero, cpu_Zf, zero);
+
+    tcg_temp_free_i32(zero);
+    tcg_temp_free_i32(R);
+
+    return true;
+}
+
+/*
+ *  This instruction performs a compare between register Rd and a constant.
+ *  The register is not changed. All conditional branches can be used after this
+ *  instruction.
+ */
+static bool trans_CPI(DisasContext *ctx, arg_CPI *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    int Imm = a->imm;
+    TCGv Rr = tcg_const_i32(Imm);
+    TCGv R = tcg_temp_new_i32();
+
+    tcg_gen_sub_tl(R, Rd, Rr); /* R = Rd - Rr */
+    tcg_gen_andi_tl(R, R, 0xff); /* make it 8 bits */
+
+    /* update status register */
+    gen_sub_CHf(R, Rd, Rr);
+    gen_sub_Vf(R, Rd, Rr);
+    gen_ZNSf(R);
+
+    tcg_temp_free_i32(R);
+    tcg_temp_free_i32(Rr);
+
+    return true;
+}
+
+/*
+ *  This instruction tests a single bit in a register and skips the next
+ *  instruction if the bit is cleared.
+ */
+static bool trans_SBRC(DisasContext *ctx, arg_SBRC *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rr = cpu_r[a->rr];
+
+    ctx->skip_cond = TCG_COND_EQ;
+    ctx->skip_var0 = tcg_temp_new();
+    ctx->free_skip_var0 = true;
+
+    tcg_gen_andi_tl(ctx->skip_var0, Rr, 1 << a->bit);
+    return true;
+}
+
+/*
+ *  This instruction tests a single bit in a register and skips the next
+ *  instruction if the bit is set.
+ */
+static bool trans_SBRS(DisasContext *ctx, arg_SBRS *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rr = cpu_r[a->rr];
+
+    ctx->skip_cond = TCG_COND_NE;
+    ctx->skip_var0 = tcg_temp_new();
+    ctx->free_skip_var0 = true;
+
+    tcg_gen_andi_tl(ctx->skip_var0, Rr, 1 << a->bit);
+    return true;
+}
+
+/*
+ *  This instruction tests a single bit in an I/O Register and skips the
+ *  next instruction if the bit is cleared. This instruction operates on the
+ *  lower 32 I/O Registers -- addresses 0-31.
+ */
+static bool trans_SBIC(DisasContext *ctx, arg_SBIC *a)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    TCGv temp = tcg_const_i32(a->reg);
+
+    gen_helper_inb(temp, cpu_env, temp);
+    tcg_gen_andi_tl(temp, temp, 1 << a->bit);
+    ctx->skip_cond = TCG_COND_EQ;
+    ctx->skip_var0 = temp;
+    ctx->free_skip_var0 = true;
+
+    return true;
+}
+
+/*
+ *  This instruction tests a single bit in an I/O Register and skips the
+ *  next instruction if the bit is set. This instruction operates on the lower
+ *  32 I/O Registers -- addresses 0-31.
+ */
+static bool trans_SBIS(DisasContext *ctx, arg_SBIS *a)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    TCGv temp = tcg_const_i32(a->reg);
+
+    gen_helper_inb(temp, cpu_env, temp);
+    tcg_gen_andi_tl(temp, temp, 1 << a->bit);
+    ctx->skip_cond = TCG_COND_NE;
+    ctx->skip_var0 = temp;
+    ctx->free_skip_var0 = true;
+
+    return true;
+}
+
+/*
+ *  Conditional relative branch. Tests a single bit in SREG and branches
+ *  relatively to PC if the bit is cleared. This instruction branches relatively
+ *  to PC in either direction (PC - 63 < = destination <= PC + 64). The
+ *  parameter k is the offset from PC and is represented in two's complement
+ *  form.
+ */
+static bool trans_BRBC(DisasContext *ctx, arg_BRBC *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGLabel *not_taken = gen_new_label();
+
+    TCGv var;
+
+    switch (a->bit) {
+    case 0x00:
+        var = cpu_Cf;
+        break;
+    case 0x01:
+        var = cpu_Zf;
+        break;
+    case 0x02:
+        var = cpu_Nf;
+        break;
+    case 0x03:
+        var = cpu_Vf;
+        break;
+    case 0x04:
+        var = cpu_Sf;
+        break;
+    case 0x05:
+        var = cpu_Hf;
+        break;
+    case 0x06:
+        var = cpu_Tf;
+        break;
+    case 0x07:
+        var = cpu_If;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    tcg_gen_brcondi_i32(TCG_COND_NE, var, 0, not_taken);
+    gen_goto_tb(ctx, 0, ctx->npc + a->imm);
+    gen_set_label(not_taken);
+
+    ctx->bstate = DISAS_CHAIN;
+    return true;
+}
+
+/*
+ *  Conditional relative branch. Tests a single bit in SREG and branches
+ *  relatively to PC if the bit is set. This instruction branches relatively to
+ *  PC in either direction (PC - 63 < = destination <= PC + 64). The parameter k
+ *  is the offset from PC and is represented in two's complement form.
+ */
+static bool trans_BRBS(DisasContext *ctx, arg_BRBS *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGLabel *not_taken = gen_new_label();
+
+    TCGv var;
+
+    switch (a->bit) {
+    case 0x00:
+        var = cpu_Cf;
+        break;
+    case 0x01:
+        var = cpu_Zf;
+        break;
+    case 0x02:
+        var = cpu_Nf;
+        break;
+    case 0x03:
+        var = cpu_Vf;
+        break;
+    case 0x04:
+        var = cpu_Sf;
+        break;
+    case 0x05:
+        var = cpu_Hf;
+        break;
+    case 0x06:
+        var = cpu_Tf;
+        break;
+    case 0x07:
+        var = cpu_If;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    tcg_gen_brcondi_i32(TCG_COND_EQ, var, 0, not_taken);
+    gen_goto_tb(ctx, 0, ctx->npc + a->imm);
+    gen_set_label(not_taken);
+
+    ctx->bstate = DISAS_CHAIN;
+    return true;
+}
+
+/*
+ * Data Transfer Instructions
+ */
+
+/*
+ *  in the gen_set_addr & gen_get_addr functions
+ *  H assumed to be in 0x00ff0000 format
+ *  M assumed to be in 0x000000ff format
+ *  L assumed to be in 0x000000ff format
+ */
+static void gen_decl(set_addr, TCGv addr, TCGv H, TCGv M, TCGv L)
+{
+    tcg_gen_andi_tl(L, addr, 0x000000ff);
+
+    tcg_gen_andi_tl(M, addr, 0x0000ff00);
+    tcg_gen_shri_tl(M, M, 8);
+
+    tcg_gen_andi_tl(H, addr, 0x00ff0000);
+}
+
+static void gen_set_xaddr(TCGContext *tcg_ctx, TCGv addr)
+{
+    gen_set_addr(tcg_ctx, addr, cpu_rampX, cpu_r[27], cpu_r[26]);
+}
+
+static void gen_set_yaddr(TCGContext *tcg_ctx, TCGv addr)
+{
+    gen_set_addr(tcg_ctx, addr, cpu_rampY, cpu_r[29], cpu_r[28]);
+}
+
+static void gen_set_zaddr(TCGContext *tcg_ctx, TCGv addr)
+{
+    gen_set_addr(tcg_ctx, addr, cpu_rampZ, cpu_r[31], cpu_r[30]);
+}
+
+static TCGv gen_decl(get_addr, TCGv H, TCGv M, TCGv L)
+{
+    TCGv addr = tcg_temp_new_i32();
+
+    tcg_gen_deposit_tl(addr, M, H, 8, 8);
+    tcg_gen_deposit_tl(addr, L, addr, 8, 16);
+
+    return addr;
+}
+
+static TCGv gen_get_xaddr(TCGContext *tcg_ctx)
+{
+    return gen_get_addr(tcg_ctx, cpu_rampX, cpu_r[27], cpu_r[26]);
+}
+
+static TCGv gen_get_yaddr(TCGContext *tcg_ctx)
+{
+    return gen_get_addr(tcg_ctx, cpu_rampY, cpu_r[29], cpu_r[28]);
+}
+
+static TCGv gen_get_zaddr(TCGContext *tcg_ctx)
+{
+    return gen_get_addr(tcg_ctx, cpu_rampZ, cpu_r[31], cpu_r[30]);
+}
+
+#define gen_set_xaddr(...)      gen_call(set_xaddr, __VA_ARGS__)
+#define gen_set_yaddr(...)      gen_call(set_yaddr, __VA_ARGS__)
+#define gen_set_zaddr(...)      gen_call(set_zaddr, __VA_ARGS__)
+#define gen_get_xaddr()         gen_call(get_xaddr)
+#define gen_get_yaddr()         gen_call(get_yaddr)
+#define gen_get_zaddr()         gen_call(get_zaddr)
+
+/*
+ *  Load one byte indirect from data space to register and stores an clear
+ *  the bits in data space specified by the register. The instruction can only
+ *  be used towards internal SRAM.  The data location is pointed to by the Z (16
+ *  bits) Pointer Register in the Register File. Memory access is limited to the
+ *  current data segment of 64KB. To access another data segment in devices with
+ *  more than 64KB data space, the RAMPZ in register in the I/O area has to be
+ *  changed.  The Z-pointer Register is left unchanged by the operation. This
+ *  instruction is especially suited for clearing status bits stored in SRAM.
+ */
+static void gen_data_store(DisasContext *ctx, TCGv data, TCGv addr)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    if (ctx->tb->flags & TB_FLAGS_FULL_ACCESS) {
+        gen_helper_fullwr(cpu_env, data, addr);
+    } else {
+        tcg_gen_qemu_st8(data, addr, MMU_DATA_IDX); /* mem[addr] = data */
+    }
+}
+
+static void gen_data_load(DisasContext *ctx, TCGv data, TCGv addr)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    if (ctx->tb->flags & TB_FLAGS_FULL_ACCESS) {
+        gen_helper_fullrd(data, cpu_env, addr);
+    } else {
+        tcg_gen_qemu_ld8u(data, addr, MMU_DATA_IDX); /* data = mem[addr] */
+    }
+}
+
+static void gen_code_load(DisasContext *ctx, TCGv Rd, TCGv addr)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    // Unicorn:
+    const uint32_t code_base = avr_code_base(ctx->env);
+    if (code_base) {
+        TCGv Rc = tcg_const_i32(code_base);
+        tcg_gen_or_tl(addr, addr, Rc);
+        tcg_temp_free_i32(Rc);
+    }
+    tcg_gen_qemu_ld8u(Rd, addr, MMU_CODE_IDX); /* Rd = mem[addr] */
+}
+
+/*
+ *  This instruction makes a copy of one register into another. The source
+ *  register Rr is left unchanged, while the destination register Rd is loaded
+ *  with a copy of Rr.
+ */
+static bool trans_MOV(DisasContext *ctx, arg_MOV *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv Rr = cpu_r[a->rr];
+
+    tcg_gen_mov_tl(Rd, Rr);
+
+    return true;
+}
+
+/*
+ *  This instruction makes a copy of one register pair into another register
+ *  pair. The source register pair Rr+1:Rr is left unchanged, while the
+ *  destination register pair Rd+1:Rd is loaded with a copy of Rr + 1:Rr.  This
+ *  instruction is not available in all devices. Refer to the device specific
+ *  instruction set summary.
+ */
+static bool trans_MOVW(DisasContext *ctx, arg_MOVW *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_MOVW)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv RdL = cpu_r[a->rd];
+    TCGv RdH = cpu_r[a->rd + 1];
+    TCGv RrL = cpu_r[a->rr];
+    TCGv RrH = cpu_r[a->rr + 1];
+
+    tcg_gen_mov_tl(RdH, RrH);
+    tcg_gen_mov_tl(RdL, RrL);
+
+    return true;
+}
+
+/*
+ * Loads an 8 bit constant directly to register 16 to 31.
+ */
+static bool trans_LDI(DisasContext *ctx, arg_LDI *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    int imm = a->imm;
+
+    tcg_gen_movi_tl(Rd, imm);
+
+    return true;
+}
+
+/*
+ *  Loads one byte from the data space to a register. For parts with SRAM,
+ *  the data space consists of the Register File, I/O memory and internal SRAM
+ *  (and external SRAM if applicable). For parts without SRAM, the data space
+ *  consists of the register file only. The EEPROM has a separate address space.
+ *  A 16-bit address must be supplied. Memory access is limited to the current
+ *  data segment of 64KB. The LDS instruction uses the RAMPD Register to access
+ *  memory above 64KB. To access another data segment in devices with more than
+ *  64KB data space, the RAMPD in register in the I/O area has to be changed.
+ *  This instruction is not available in all devices. Refer to the device
+ *  specific instruction set summary.
+ */
+static bool trans_LDS(DisasContext *ctx, arg_LDS *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = tcg_temp_new_i32();
+    TCGv H = cpu_rampD;
+    a->imm = next_word(ctx);
+
+    tcg_gen_mov_tl(addr, H); /* addr = H:M:L */
+    tcg_gen_shli_tl(addr, addr, 16);
+    tcg_gen_ori_tl(addr, addr, a->imm);
+
+    gen_data_load(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ *  Loads one byte indirect from the data space to a register. For parts
+ *  with SRAM, the data space consists of the Register File, I/O memory and
+ *  internal SRAM (and external SRAM if applicable). For parts without SRAM, the
+ *  data space consists of the Register File only. In some parts the Flash
+ *  Memory has been mapped to the data space and can be read using this command.
+ *  The EEPROM has a separate address space.  The data location is pointed to by
+ *  the X (16 bits) Pointer Register in the Register File. Memory access is
+ *  limited to the current data segment of 64KB. To access another data segment
+ *  in devices with more than 64KB data space, the RAMPX in register in the I/O
+ *  area has to be changed.  The X-pointer Register can either be left unchanged
+ *  by the operation, or it can be post-incremented or predecremented.  These
+ *  features are especially suited for accessing arrays, tables, and Stack
+ *  Pointer usage of the X-pointer Register. Note that only the low byte of the
+ *  X-pointer is updated in devices with no more than 256 bytes data space. For
+ *  such devices, the high byte of the pointer is not used by this instruction
+ *  and can be used for other purposes. The RAMPX Register in the I/O area is
+ *  updated in parts with more than 64KB data space or more than 64KB Program
+ *  memory, and the increment/decrement is added to the entire 24-bit address on
+ *  such devices.  Not all variants of this instruction is available in all
+ *  devices. Refer to the device specific instruction set summary.  In the
+ *  Reduced Core tinyAVR the LD instruction can be used to achieve the same
+ *  operation as LPM since the program memory is mapped to the data memory
+ *  space.
+ */
+static bool trans_LDX1(DisasContext *ctx, arg_LDX1 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_xaddr();
+
+    gen_data_load(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_LDX2(DisasContext *ctx, arg_LDX2 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_xaddr();
+
+    gen_data_load(ctx, Rd, addr);
+    tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */
+
+    gen_set_xaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_LDX3(DisasContext *ctx, arg_LDX3 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_xaddr();
+
+    tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */
+    gen_data_load(ctx, Rd, addr);
+    gen_set_xaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ *  Loads one byte indirect with or without displacement from the data space
+ *  to a register. For parts with SRAM, the data space consists of the Register
+ *  File, I/O memory and internal SRAM (and external SRAM if applicable). For
+ *  parts without SRAM, the data space consists of the Register File only. In
+ *  some parts the Flash Memory has been mapped to the data space and can be
+ *  read using this command. The EEPROM has a separate address space.  The data
+ *  location is pointed to by the Y (16 bits) Pointer Register in the Register
+ *  File. Memory access is limited to the current data segment of 64KB. To
+ *  access another data segment in devices with more than 64KB data space, the
+ *  RAMPY in register in the I/O area has to be changed.  The Y-pointer Register
+ *  can either be left unchanged by the operation, or it can be post-incremented
+ *  or predecremented.  These features are especially suited for accessing
+ *  arrays, tables, and Stack Pointer usage of the Y-pointer Register. Note that
+ *  only the low byte of the Y-pointer is updated in devices with no more than
+ *  256 bytes data space. For such devices, the high byte of the pointer is not
+ *  used by this instruction and can be used for other purposes. The RAMPY
+ *  Register in the I/O area is updated in parts with more than 64KB data space
+ *  or more than 64KB Program memory, and the increment/decrement/displacement
+ *  is added to the entire 24-bit address on such devices.  Not all variants of
+ *  this instruction is available in all devices. Refer to the device specific
+ *  instruction set summary.  In the Reduced Core tinyAVR the LD instruction can
+ *  be used to achieve the same operation as LPM since the program memory is
+ *  mapped to the data memory space.
+ */
+static bool trans_LDY2(DisasContext *ctx, arg_LDY2 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_yaddr();
+
+    gen_data_load(ctx, Rd, addr);
+    tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */
+
+    gen_set_yaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_LDY3(DisasContext *ctx, arg_LDY3 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_yaddr();
+
+    tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */
+    gen_data_load(ctx, Rd, addr);
+    gen_set_yaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_LDDY(DisasContext *ctx, arg_LDDY *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_yaddr();
+
+    tcg_gen_addi_tl(addr, addr, a->imm); /* addr = addr + q */
+    gen_data_load(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ *  Loads one byte indirect with or without displacement from the data space
+ *  to a register. For parts with SRAM, the data space consists of the Register
+ *  File, I/O memory and internal SRAM (and external SRAM if applicable). For
+ *  parts without SRAM, the data space consists of the Register File only. In
+ *  some parts the Flash Memory has been mapped to the data space and can be
+ *  read using this command. The EEPROM has a separate address space.  The data
+ *  location is pointed to by the Z (16 bits) Pointer Register in the Register
+ *  File. Memory access is limited to the current data segment of 64KB. To
+ *  access another data segment in devices with more than 64KB data space, the
+ *  RAMPZ in register in the I/O area has to be changed.  The Z-pointer Register
+ *  can either be left unchanged by the operation, or it can be post-incremented
+ *  or predecremented.  These features are especially suited for Stack Pointer
+ *  usage of the Z-pointer Register, however because the Z-pointer Register can
+ *  be used for indirect subroutine calls, indirect jumps and table lookup, it
+ *  is often more convenient to use the X or Y-pointer as a dedicated Stack
+ *  Pointer. Note that only the low byte of the Z-pointer is updated in devices
+ *  with no more than 256 bytes data space. For such devices, the high byte of
+ *  the pointer is not used by this instruction and can be used for other
+ *  purposes. The RAMPZ Register in the I/O area is updated in parts with more
+ *  than 64KB data space or more than 64KB Program memory, and the
+ *  increment/decrement/displacement is added to the entire 24-bit address on
+ *  such devices.  Not all variants of this instruction is available in all
+ *  devices. Refer to the device specific instruction set summary.  In the
+ *  Reduced Core tinyAVR the LD instruction can be used to achieve the same
+ *  operation as LPM since the program memory is mapped to the data memory
+ *  space.  For using the Z-pointer for table lookup in Program memory see the
+ *  LPM and ELPM instructions.
+ */
+static bool trans_LDZ2(DisasContext *ctx, arg_LDZ2 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+
+    gen_data_load(ctx, Rd, addr);
+    tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */
+
+    gen_set_zaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_LDZ3(DisasContext *ctx, arg_LDZ3 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+
+    tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */
+    gen_data_load(ctx, Rd, addr);
+
+    gen_set_zaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_LDDZ(DisasContext *ctx, arg_LDDZ *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+
+    tcg_gen_addi_tl(addr, addr, a->imm); /* addr = addr + q */
+    gen_data_load(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ *  Stores one byte from a Register to the data space. For parts with SRAM,
+ *  the data space consists of the Register File, I/O memory and internal SRAM
+ *  (and external SRAM if applicable). For parts without SRAM, the data space
+ *  consists of the Register File only. The EEPROM has a separate address space.
+ *  A 16-bit address must be supplied. Memory access is limited to the current
+ *  data segment of 64KB. The STS instruction uses the RAMPD Register to access
+ *  memory above 64KB. To access another data segment in devices with more than
+ *  64KB data space, the RAMPD in register in the I/O area has to be changed.
+ *  This instruction is not available in all devices. Refer to the device
+ *  specific instruction set summary.
+ */
+static bool trans_STS(DisasContext *ctx, arg_STS *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = tcg_temp_new_i32();
+    TCGv H = cpu_rampD;
+    a->imm = next_word(ctx);
+
+    tcg_gen_mov_tl(addr, H); /* addr = H:M:L */
+    tcg_gen_shli_tl(addr, addr, 16);
+    tcg_gen_ori_tl(addr, addr, a->imm);
+    gen_data_store(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ * Stores one byte indirect from a register to data space. For parts with SRAM,
+ * the data space consists of the Register File, I/O memory, and internal SRAM
+ * (and external SRAM if applicable). For parts without SRAM, the data space
+ * consists of the Register File only. The EEPROM has a separate address space.
+ *
+ * The data location is pointed to by the X (16 bits) Pointer Register in the
+ * Register File. Memory access is limited to the current data segment of 64KB.
+ * To access another data segment in devices with more than 64KB data space, the
+ * RAMPX in register in the I/O area has to be changed.
+ *
+ * The X-pointer Register can either be left unchanged by the operation, or it
+ * can be post-incremented or pre-decremented. These features are especially
+ * suited for accessing arrays, tables, and Stack Pointer usage of the
+ * X-pointer Register. Note that only the low byte of the X-pointer is updated
+ * in devices with no more than 256 bytes data space. For such devices, the high
+ * byte of the pointer is not used by this instruction and can be used for other
+ * purposes. The RAMPX Register in the I/O area is updated in parts with more
+ * than 64KB data space or more than 64KB Program memory, and the increment /
+ * decrement is added to the entire 24-bit address on such devices.
+ */
+static bool trans_STX1(DisasContext *ctx, arg_STX1 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rr];
+    TCGv addr = gen_get_xaddr();
+
+    gen_data_store(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_STX2(DisasContext *ctx, arg_STX2 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rr];
+    TCGv addr = gen_get_xaddr();
+
+    gen_data_store(ctx, Rd, addr);
+    tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */
+    gen_set_xaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_STX3(DisasContext *ctx, arg_STX3 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rr];
+    TCGv addr = gen_get_xaddr();
+
+    tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */
+    gen_data_store(ctx, Rd, addr);
+    gen_set_xaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ * Stores one byte indirect with or without displacement from a register to data
+ * space. For parts with SRAM, the data space consists of the Register File, I/O
+ * memory, and internal SRAM (and external SRAM if applicable). For parts
+ * without SRAM, the data space consists of the Register File only. The EEPROM
+ * has a separate address space.
+ *
+ * The data location is pointed to by the Y (16 bits) Pointer Register in the
+ * Register File. Memory access is limited to the current data segment of 64KB.
+ * To access another data segment in devices with more than 64KB data space, the
+ * RAMPY in register in the I/O area has to be changed.
+ *
+ * The Y-pointer Register can either be left unchanged by the operation, or it
+ * can be post-incremented or pre-decremented. These features are especially
+ * suited for accessing arrays, tables, and Stack Pointer usage of the Y-pointer
+ * Register. Note that only the low byte of the Y-pointer is updated in devices
+ * with no more than 256 bytes data space. For such devices, the high byte of
+ * the pointer is not used by this instruction and can be used for other
+ * purposes. The RAMPY Register in the I/O area is updated in parts with more
+ * than 64KB data space or more than 64KB Program memory, and the increment /
+ * decrement / displacement is added to the entire 24-bit address on such
+ * devices.
+ */
+static bool trans_STY2(DisasContext *ctx, arg_STY2 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_yaddr();
+
+    gen_data_store(ctx, Rd, addr);
+    tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */
+    gen_set_yaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_STY3(DisasContext *ctx, arg_STY3 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_yaddr();
+
+    tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */
+    gen_data_store(ctx, Rd, addr);
+    gen_set_yaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_STDY(DisasContext *ctx, arg_STDY *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_yaddr();
+
+    tcg_gen_addi_tl(addr, addr, a->imm); /* addr = addr + q */
+    gen_data_store(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ * Stores one byte indirect with or without displacement from a register to data
+ * space. For parts with SRAM, the data space consists of the Register File, I/O
+ * memory, and internal SRAM (and external SRAM if applicable). For parts
+ * without SRAM, the data space consists of the Register File only. The EEPROM
+ * has a separate address space.
+ *
+ * The data location is pointed to by the Y (16 bits) Pointer Register in the
+ * Register File. Memory access is limited to the current data segment of 64KB.
+ * To access another data segment in devices with more than 64KB data space, the
+ * RAMPY in register in the I/O area has to be changed.
+ *
+ * The Y-pointer Register can either be left unchanged by the operation, or it
+ * can be post-incremented or pre-decremented. These features are especially
+ * suited for accessing arrays, tables, and Stack Pointer usage of the Y-pointer
+ * Register. Note that only the low byte of the Y-pointer is updated in devices
+ * with no more than 256 bytes data space. For such devices, the high byte of
+ * the pointer is not used by this instruction and can be used for other
+ * purposes. The RAMPY Register in the I/O area is updated in parts with more
+ * than 64KB data space or more than 64KB Program memory, and the increment /
+ * decrement / displacement is added to the entire 24-bit address on such
+ * devices.
+ */
+static bool trans_STZ2(DisasContext *ctx, arg_STZ2 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+
+    gen_data_store(ctx, Rd, addr);
+    tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */
+
+    gen_set_zaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_STZ3(DisasContext *ctx, arg_STZ3 *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+
+    tcg_gen_subi_tl(addr, addr, 1); /* addr = addr - 1 */
+    gen_data_store(ctx, Rd, addr);
+
+    gen_set_zaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_STDZ(DisasContext *ctx, arg_STDZ *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+
+    tcg_gen_addi_tl(addr, addr, a->imm); /* addr = addr + q */
+    gen_data_store(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ *  Loads one byte pointed to by the Z-register into the destination
+ *  register Rd. This instruction features a 100% space effective constant
+ *  initialization or constant data fetch. The Program memory is organized in
+ *  16-bit words while the Z-pointer is a byte address. Thus, the least
+ *  significant bit of the Z-pointer selects either low byte (ZLSB = 0) or high
+ *  byte (ZLSB = 1). This instruction can address the first 64KB (32K words) of
+ *  Program memory. The Zpointer Register can either be left unchanged by the
+ *  operation, or it can be incremented. The incrementation does not apply to
+ *  the RAMPZ Register.
+ *
+ *  Devices with Self-Programming capability can use the LPM instruction to read
+ *  the Fuse and Lock bit values.
+ */
+static bool trans_LPM1(DisasContext *ctx, arg_LPM1 *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_LPM)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[0];
+    TCGv addr = tcg_temp_new_i32();
+    TCGv H = cpu_r[31];
+    TCGv L = cpu_r[30];
+
+    tcg_gen_shli_tl(addr, H, 8); /* addr = H:L */
+    tcg_gen_or_tl(addr, addr, L);
+    gen_code_load(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_LPM2(DisasContext *ctx, arg_LPM2 *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_LPM)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = tcg_temp_new_i32();
+    TCGv H = cpu_r[31];
+    TCGv L = cpu_r[30];
+
+    tcg_gen_shli_tl(addr, H, 8); /* addr = H:L */
+    tcg_gen_or_tl(addr, addr, L);
+    gen_code_load(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_LPMX(DisasContext *ctx, arg_LPMX *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_LPMX)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = tcg_temp_new_i32();
+    TCGv H = cpu_r[31];
+    TCGv L = cpu_r[30];
+
+    tcg_gen_shli_tl(addr, H, 8); /* addr = H:L */
+    tcg_gen_or_tl(addr, addr, L);
+    gen_code_load(ctx, Rd, addr);
+    tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */
+    tcg_gen_andi_tl(L, addr, 0xff);
+    tcg_gen_shri_tl(addr, addr, 8);
+    tcg_gen_andi_tl(H, addr, 0xff);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ *  Loads one byte pointed to by the Z-register and the RAMPZ Register in
+ *  the I/O space, and places this byte in the destination register Rd. This
+ *  instruction features a 100% space effective constant initialization or
+ *  constant data fetch. The Program memory is organized in 16-bit words while
+ *  the Z-pointer is a byte address. Thus, the least significant bit of the
+ *  Z-pointer selects either low byte (ZLSB = 0) or high byte (ZLSB = 1). This
+ *  instruction can address the entire Program memory space. The Z-pointer
+ *  Register can either be left unchanged by the operation, or it can be
+ *  incremented. The incrementation applies to the entire 24-bit concatenation
+ *  of the RAMPZ and Z-pointer Registers.
+ *
+ *  Devices with Self-Programming capability can use the ELPM instruction to
+ *  read the Fuse and Lock bit value.
+ */
+static bool trans_ELPM1(DisasContext *ctx, arg_ELPM1 *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_ELPM)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[0];
+    TCGv addr = gen_get_zaddr();
+
+    gen_code_load(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_ELPM2(DisasContext *ctx, arg_ELPM2 *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_ELPM)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+
+    gen_code_load(ctx, Rd, addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_ELPMX(DisasContext *ctx, arg_ELPMX *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_ELPMX)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+
+    gen_code_load(ctx, Rd, addr);
+    tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */
+    gen_set_zaddr(addr);
+
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ *  SPM can be used to erase a page in the Program memory, to write a page
+ *  in the Program memory (that is already erased), and to set Boot Loader Lock
+ *  bits. In some devices, the Program memory can be written one word at a time,
+ *  in other devices an entire page can be programmed simultaneously after first
+ *  filling a temporary page buffer. In all cases, the Program memory must be
+ *  erased one page at a time. When erasing the Program memory, the RAMPZ and
+ *  Z-register are used as page address. When writing the Program memory, the
+ *  RAMPZ and Z-register are used as page or word address, and the R1:R0
+ *  register pair is used as data(1). When setting the Boot Loader Lock bits,
+ *  the R1:R0 register pair is used as data. Refer to the device documentation
+ *  for detailed description of SPM usage. This instruction can address the
+ *  entire Program memory.
+ *
+ *  The SPM instruction is not available in all devices. Refer to the device
+ *  specific instruction set summary.
+ *
+ *  Note: 1. R1 determines the instruction high byte, and R0 determines the
+ *  instruction low byte.
+ */
+static bool trans_SPM(DisasContext *ctx, arg_SPM *a)
+{
+    /* TODO */
+    if (!avr_have_feature(ctx, AVR_FEATURE_SPM)) {
+        return true;
+    }
+
+    return true;
+}
+
+static bool trans_SPMX(DisasContext *ctx, arg_SPMX *a)
+{
+    /* TODO */
+    if (!avr_have_feature(ctx, AVR_FEATURE_SPMX)) {
+        return true;
+    }
+
+    return true;
+}
+
+/*
+ *  Loads data from the I/O Space (Ports, Timers, Configuration Registers,
+ *  etc.) into register Rd in the Register File.
+ */
+static bool trans_IN(DisasContext *ctx, arg_IN *a)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv port = tcg_const_i32(a->imm);
+
+    gen_helper_inb(Rd, cpu_env, port);
+
+    tcg_temp_free_i32(port);
+
+    return true;
+}
+
+/*
+ *  Stores data from register Rr in the Register File to I/O Space (Ports,
+ *  Timers, Configuration Registers, etc.).
+ */
+static bool trans_OUT(DisasContext *ctx, arg_OUT *a)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv port = tcg_const_i32(a->imm);
+
+    gen_helper_outb(cpu_env, port, Rd);
+
+    tcg_temp_free_i32(port);
+
+    return true;
+}
+
+/*
+ *  This instruction stores the contents of register Rr on the STACK. The
+ *  Stack Pointer is post-decremented by 1 after the PUSH.  This instruction is
+ *  not available in all devices. Refer to the device specific instruction set
+ *  summary.
+ */
+static bool trans_PUSH(DisasContext *ctx, arg_PUSH *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+
+    gen_data_store(ctx, Rd, cpu_sp);
+    tcg_gen_subi_tl(cpu_sp, cpu_sp, 1);
+
+    return true;
+}
+
+/*
+ *  This instruction loads register Rd with a byte from the STACK. The Stack
+ *  Pointer is pre-incremented by 1 before the POP.  This instruction is not
+ *  available in all devices. Refer to the device specific instruction set
+ *  summary.
+ */
+static bool trans_POP(DisasContext *ctx, arg_POP *a)
+{
+    /*
+     * Using a temp to work around some strange behaviour:
+     * tcg_gen_addi_tl(cpu_sp, cpu_sp, 1);
+     * gen_data_load(ctx, Rd, cpu_sp);
+     * seems to cause the add to happen twice.
+     * This doesn't happen if either the add or the load is removed.
+     */
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv t1 = tcg_temp_new_i32();
+    TCGv Rd = cpu_r[a->rd];
+
+    tcg_gen_addi_tl(t1, cpu_sp, 1);
+    gen_data_load(ctx, Rd, t1);
+    tcg_gen_mov_tl(cpu_sp, t1);
+
+    return true;
+}
+
+/*
+ *  Exchanges one byte indirect between register and data space.  The data
+ *  location is pointed to by the Z (16 bits) Pointer Register in the Register
+ *  File. Memory access is limited to the current data segment of 64KB. To
+ *  access another data segment in devices with more than 64KB data space, the
+ *  RAMPZ in register in the I/O area has to be changed.
+ *
+ *  The Z-pointer Register is left unchanged by the operation. This instruction
+ *  is especially suited for writing/reading status bits stored in SRAM.
+ */
+static bool trans_XCH(DisasContext *ctx, arg_XCH *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_RMW)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv t0 = tcg_temp_new_i32();
+    TCGv addr = gen_get_zaddr();
+
+    gen_data_load(ctx, t0, addr);
+    gen_data_store(ctx, Rd, addr);
+    tcg_gen_mov_tl(Rd, t0);
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ *  Load one byte indirect from data space to register and set bits in data
+ *  space specified by the register. The instruction can only be used towards
+ *  internal SRAM.  The data location is pointed to by the Z (16 bits) Pointer
+ *  Register in the Register File. Memory access is limited to the current data
+ *  segment of 64KB. To access another data segment in devices with more than
+ *  64KB data space, the RAMPZ in register in the I/O area has to be changed.
+ *
+ *  The Z-pointer Register is left unchanged by the operation. This instruction
+ *  is especially suited for setting status bits stored in SRAM.
+ */
+static bool trans_LAS(DisasContext *ctx, arg_LAS *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_RMW)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rr = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+    TCGv t0 = tcg_temp_new_i32();
+    TCGv t1 = tcg_temp_new_i32();
+
+    gen_data_load(ctx, t0, addr); /* t0 = mem[addr] */
+    tcg_gen_or_tl(t1, t0, Rr);
+    tcg_gen_mov_tl(Rr, t0); /* Rr = t0 */
+    gen_data_store(ctx, t1, addr); /* mem[addr] = t1 */
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ *  Load one byte indirect from data space to register and stores and clear
+ *  the bits in data space specified by the register. The instruction can
+ *  only be used towards internal SRAM.  The data location is pointed to by
+ *  the Z (16 bits) Pointer Register in the Register File. Memory access is
+ *  limited to the current data segment of 64KB. To access another data
+ *  segment in devices with more than 64KB data space, the RAMPZ in register
+ *  in the I/O area has to be changed.
+ *
+ *  The Z-pointer Register is left unchanged by the operation. This instruction
+ *  is especially suited for clearing status bits stored in SRAM.
+ */
+static bool trans_LAC(DisasContext *ctx, arg_LAC *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_RMW)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rr = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+    TCGv t0 = tcg_temp_new_i32();
+    TCGv t1 = tcg_temp_new_i32();
+
+    gen_data_load(ctx, t0, addr); /* t0 = mem[addr] */
+    tcg_gen_andc_tl(t1, t0, Rr); /* t1 = t0 & (0xff - Rr) = t0 & ~Rr */
+    tcg_gen_mov_tl(Rr, t0); /* Rr = t0 */
+    gen_data_store(ctx, t1, addr); /* mem[addr] = t1 */
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+
+/*
+ *  Load one byte indirect from data space to register and toggles bits in
+ *  the data space specified by the register.  The instruction can only be used
+ *  towards SRAM.  The data location is pointed to by the Z (16 bits) Pointer
+ *  Register in the Register File. Memory access is limited to the current data
+ *  segment of 64KB. To access another data segment in devices with more than
+ *  64KB data space, the RAMPZ in register in the I/O area has to be changed.
+ *
+ *  The Z-pointer Register is left unchanged by the operation. This instruction
+ *  is especially suited for changing status bits stored in SRAM.
+ */
+static bool trans_LAT(DisasContext *ctx, arg_LAT *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_RMW)) {
+        return true;
+    }
+
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv addr = gen_get_zaddr();
+    TCGv t0 = tcg_temp_new_i32();
+    TCGv t1 = tcg_temp_new_i32();
+
+    gen_data_load(ctx, t0, addr); /* t0 = mem[addr] */
+    tcg_gen_xor_tl(t1, t0, Rd);
+    tcg_gen_mov_tl(Rd, t0); /* Rd = t0 */
+    gen_data_store(ctx, t1, addr); /* mem[addr] = t1 */
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+/*
+ * Bit and Bit-test Instructions
+ */
+static void gen_decl(rshift_ZNVSf, TCGv R)
+{
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, R, 0); /* Zf = R == 0 */
+    tcg_gen_shri_tl(cpu_Nf, R, 7); /* Nf = R(7) */
+    tcg_gen_xor_tl(cpu_Vf, cpu_Nf, cpu_Cf);
+    tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf); /* Sf = Nf ^ Vf */
+}
+
+#define gen_rshift_ZNVSf(...)   gen_call(rshift_ZNVSf, __VA_ARGS__)
+
+/*
+ *  Shifts all bits in Rd one place to the right. Bit 7 is cleared. Bit 0 is
+ *  loaded into the C Flag of the SREG. This operation effectively divides an
+ *  unsigned value by two. The C Flag can be used to round the result.
+ */
+static bool trans_LSR(DisasContext *ctx, arg_LSR *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+
+    tcg_gen_andi_tl(cpu_Cf, Rd, 1);
+    tcg_gen_shri_tl(Rd, Rd, 1);
+
+    /* update status register */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, Rd, 0); /* Zf = Rd == 0 */
+    tcg_gen_movi_tl(cpu_Nf, 0);
+    tcg_gen_mov_tl(cpu_Vf, cpu_Cf);
+    tcg_gen_mov_tl(cpu_Sf, cpu_Vf);
+
+    return true;
+}
+
+/*
+ *  Shifts all bits in Rd one place to the right. The C Flag is shifted into
+ *  bit 7 of Rd. Bit 0 is shifted into the C Flag.  This operation, combined
+ *  with ASR, effectively divides multi-byte signed values by two. Combined with
+ *  LSR it effectively divides multi-byte unsigned values by two. The Carry Flag
+ *  can be used to round the result.
+ */
+static bool trans_ROR(DisasContext *ctx, arg_ROR *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv t0 = tcg_temp_new_i32();
+
+    tcg_gen_shli_tl(t0, cpu_Cf, 7);
+
+    /* update status register */
+    tcg_gen_andi_tl(cpu_Cf, Rd, 1);
+
+    /* update output register */
+    tcg_gen_shri_tl(Rd, Rd, 1);
+    tcg_gen_or_tl(Rd, Rd, t0);
+
+    /* update status register */
+    gen_rshift_ZNVSf(Rd);
+
+    tcg_temp_free_i32(t0);
+
+    return true;
+}
+
+/*
+ *  Shifts all bits in Rd one place to the right. Bit 7 is held constant. Bit 0
+ *  is loaded into the C Flag of the SREG. This operation effectively divides a
+ *  signed value by two without changing its sign. The Carry Flag can be used to
+ *  round the result.
+ */
+static bool trans_ASR(DisasContext *ctx, arg_ASR *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv t0 = tcg_temp_new_i32();
+
+    /* update status register */
+    tcg_gen_andi_tl(cpu_Cf, Rd, 1); /* Cf = Rd(0) */
+
+    /* update output register */
+    tcg_gen_andi_tl(t0, Rd, 0x80); /* Rd = (Rd & 0x80) | (Rd >> 1) */
+    tcg_gen_shri_tl(Rd, Rd, 1);
+    tcg_gen_or_tl(Rd, Rd, t0);
+
+    /* update status register */
+    gen_rshift_ZNVSf(Rd);
+
+    tcg_temp_free_i32(t0);
+
+    return true;
+}
+
+/*
+ *  Swaps high and low nibbles in a register.
+ */
+static bool trans_SWAP(DisasContext *ctx, arg_SWAP *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv t0 = tcg_temp_new_i32();
+    TCGv t1 = tcg_temp_new_i32();
+
+    tcg_gen_andi_tl(t0, Rd, 0x0f);
+    tcg_gen_shli_tl(t0, t0, 4);
+    tcg_gen_andi_tl(t1, Rd, 0xf0);
+    tcg_gen_shri_tl(t1, t1, 4);
+    tcg_gen_or_tl(Rd, t0, t1);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
+
+    return true;
+}
+
+/*
+ *  Sets a specified bit in an I/O Register. This instruction operates on
+ *  the lower 32 I/O Registers -- addresses 0-31.
+ */
+static bool trans_SBI(DisasContext *ctx, arg_SBI *a)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    TCGv data = tcg_temp_new_i32();
+    TCGv port = tcg_const_i32(a->reg);
+
+    gen_helper_inb(data, cpu_env, port);
+    tcg_gen_ori_tl(data, data, 1 << a->bit);
+    gen_helper_outb(cpu_env, port, data);
+
+    tcg_temp_free_i32(port);
+    tcg_temp_free_i32(data);
+
+    return true;
+}
+
+/*
+ *  Clears a specified bit in an I/O Register. This instruction operates on
+ *  the lower 32 I/O Registers -- addresses 0-31.
+ */
+static bool trans_CBI(DisasContext *ctx, arg_CBI *a)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    TCGv data = tcg_temp_new_i32();
+    TCGv port = tcg_const_i32(a->reg);
+
+    gen_helper_inb(data, cpu_env, port);
+    tcg_gen_andi_tl(data, data, ~(1 << a->bit));
+    gen_helper_outb(cpu_env, port, data);
+
+    tcg_temp_free_i32(data);
+    tcg_temp_free_i32(port);
+
+    return true;
+}
+
+/*
+ *  Stores bit b from Rd to the T Flag in SREG (Status Register).
+ */
+static bool trans_BST(DisasContext *ctx, arg_BST *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+
+    tcg_gen_andi_tl(cpu_Tf, Rd, 1 << a->bit);
+    tcg_gen_shri_tl(cpu_Tf, cpu_Tf, a->bit);
+
+    return true;
+}
+
+/*
+ *  Copies the T Flag in the SREG (Status Register) to bit b in register Rd.
+ */
+static bool trans_BLD(DisasContext *ctx, arg_BLD *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    TCGv Rd = cpu_r[a->rd];
+    TCGv t1 = tcg_temp_new_i32();
+
+    tcg_gen_andi_tl(Rd, Rd, ~(1u << a->bit)); /* clear bit */
+    tcg_gen_shli_tl(t1, cpu_Tf, a->bit); /* create mask */
+    tcg_gen_or_tl(Rd, Rd, t1);
+
+    tcg_temp_free_i32(t1);
+
+    return true;
+}
+
+/*
+ *  Sets a single Flag or bit in SREG.
+ */
+static bool trans_BSET(DisasContext *ctx, arg_BSET *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    switch (a->bit) {
+    case 0x00:
+        tcg_gen_movi_tl(cpu_Cf, 0x01);
+        break;
+    case 0x01:
+        tcg_gen_movi_tl(cpu_Zf, 0x01);
+        break;
+    case 0x02:
+        tcg_gen_movi_tl(cpu_Nf, 0x01);
+        break;
+    case 0x03:
+        tcg_gen_movi_tl(cpu_Vf, 0x01);
+        break;
+    case 0x04:
+        tcg_gen_movi_tl(cpu_Sf, 0x01);
+        break;
+    case 0x05:
+        tcg_gen_movi_tl(cpu_Hf, 0x01);
+        break;
+    case 0x06:
+        tcg_gen_movi_tl(cpu_Tf, 0x01);
+        break;
+    case 0x07:
+        tcg_gen_movi_tl(cpu_If, 0x01);
+        break;
+    }
+
+    return true;
+}
+
+/*
+ *  Clears a single Flag in SREG.
+ */
+static bool trans_BCLR(DisasContext *ctx, arg_BCLR *a)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    switch (a->bit) {
+    case 0x00:
+        tcg_gen_movi_tl(cpu_Cf, 0x00);
+        break;
+    case 0x01:
+        tcg_gen_movi_tl(cpu_Zf, 0x00);
+        break;
+    case 0x02:
+        tcg_gen_movi_tl(cpu_Nf, 0x00);
+        break;
+    case 0x03:
+        tcg_gen_movi_tl(cpu_Vf, 0x00);
+        break;
+    case 0x04:
+        tcg_gen_movi_tl(cpu_Sf, 0x00);
+        break;
+    case 0x05:
+        tcg_gen_movi_tl(cpu_Hf, 0x00);
+        break;
+    case 0x06:
+        tcg_gen_movi_tl(cpu_Tf, 0x00);
+        break;
+    case 0x07:
+        tcg_gen_movi_tl(cpu_If, 0x00);
+        break;
+    }
+
+    return true;
+}
+
+/*
+ * MCU Control Instructions
+ */
+
+/*
+ *  The BREAK instruction is used by the On-chip Debug system, and is
+ *  normally not used in the application software. When the BREAK instruction is
+ *  executed, the AVR CPU is set in the Stopped Mode. This gives the On-chip
+ *  Debugger access to internal resources.  If any Lock bits are set, or either
+ *  the JTAGEN or OCDEN Fuses are unprogrammed, the CPU will treat the BREAK
+ *  instruction as a NOP and will not enter the Stopped mode.  This instruction
+ *  is not available in all devices. Refer to the device specific instruction
+ *  set summary.
+ */
+static bool trans_BREAK(DisasContext *ctx, arg_BREAK *a)
+{
+    if (!avr_have_feature(ctx, AVR_FEATURE_BREAK)) {
+        return true;
+    }
+
+#ifdef BREAKPOINT_ON_BREAK
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    tcg_gen_movi_tl(cpu_pc, ctx->npc - 1);
+    gen_helper_debug(cpu_env);
+    ctx->bstate = DISAS_EXIT;
+#else
+    /* NOP */
+#endif
+
+    return true;
+}
+
+/*
+ *  This instruction performs a single cycle No Operation.
+ */
+static bool trans_NOP(DisasContext *ctx, arg_NOP *a)
+{
+
+    /* NOP */
+
+    return true;
+}
+
+/*
+ *  This instruction sets the circuit in sleep mode defined by the MCU
+ *  Control Register.
+ */
+static bool trans_SLEEP(DisasContext *ctx, arg_SLEEP *a)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    gen_helper_sleep(cpu_env);
+    ctx->bstate = DISAS_NORETURN;
+    return true;
+}
+
+/*
+ *  This instruction resets the Watchdog Timer. This instruction must be
+ *  executed within a limited time given by the WD prescaler. See the Watchdog
+ *  Timer hardware specification.
+ */
+static bool trans_WDR(DisasContext *ctx, arg_WDR *a)
+{
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+    gen_helper_wdr(cpu_env);
+
+    return true;
+}
+
+/*
+ *  Core translation mechanism functions:
+ *
+ *    - translate()
+ *    - canonicalize_skip()
+ *    - gen_intermediate_code()
+ *    - restore_state_to_opc()
+ *
+ */
+static void translate(DisasContext *ctx)
+{
+    INIT_UC_CONTEXT_FROM_DISAS(ctx);
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx);
+
+    // Unicorn: end address tells us to stop emulation
+    const target_ulong insn_pc = ctx->npc;
+    if (uc_addr_is_exit(uc, insn_pc*2)) {
+        ctx->bstate = DISAS_UC_EXIT;
+        return;
+    }
+
+    // Unicorn: trace this instruction on request
+    bool insn_hook = false;
+    TCGOp *insn_prev_op = NULL;
+    if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_CODE, insn_pc*2)) {
+
+        // sync PC in advance
+        tcg_gen_movi_tl(cpu_pc, insn_pc);
+
+        // save the last operand
+        insn_prev_op = tcg_last_op(tcg_ctx);
+        insn_hook = true;
+        gen_uc_tracecode(tcg_ctx, 0xf1, UC_HOOK_CODE_IDX, uc, insn_pc*2);
+
+        // the callback might want to stop emulation immediately
+        check_exit_request(tcg_ctx);
+    }
+
+    uint32_t opcode = next_word(ctx);
+    if (!decode_insn(ctx, opcode)) {
+        gen_helper_unsupported(cpu_env);
+        ctx->bstate = DISAS_NORETURN;
+    }
+
+    if (insn_hook) {
+        // Unicorn: patch the callback to have the proper instruction size.
+        TCGOp *const tcg_op = insn_prev_op ?
+            QTAILQ_NEXT(insn_prev_op, link) : QTAILQ_FIRST(&tcg_ctx->ops);
+        tcg_op->args[1] = (ctx->npc - insn_pc)*2;
+    }
+}
+
+/* Standardize the cpu_skip condition to NE.  */
+static bool canonicalize_skip(DisasContext *ctx)
+{
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx);
+    switch (ctx->skip_cond) {
+    case TCG_COND_NEVER:
+        /* Normal case: cpu_skip is known to be false.  */
+        return false;
+
+    case TCG_COND_ALWAYS:
+        /*
+         * Breakpoint case: cpu_skip is known to be true, via TB_FLAGS_SKIP.
+         * The breakpoint is on the instruction being skipped, at the start
+         * of the TranslationBlock.  No need to update.
+         */
+        return false;
+
+    case TCG_COND_NE:
+        if (ctx->skip_var1 == NULL) {
+            tcg_gen_mov_tl(cpu_skip, ctx->skip_var0);
+        } else {
+            tcg_gen_xor_tl(cpu_skip, ctx->skip_var0, ctx->skip_var1);
+            ctx->skip_var1 = NULL;
+        }
+        break;
+
+    default:
+        /* Convert to a NE condition vs 0. */
+        if (ctx->skip_var1 == NULL) {
+            tcg_gen_setcondi_tl(ctx->skip_cond, cpu_skip, ctx->skip_var0, 0);
+        } else {
+            tcg_gen_setcond_tl(ctx->skip_cond, cpu_skip,
+                               ctx->skip_var0, ctx->skip_var1);
+            ctx->skip_var1 = NULL;
+        }
+        ctx->skip_cond = TCG_COND_NE;
+        break;
+    }
+    if (ctx->free_skip_var0) {
+        tcg_temp_free(ctx->skip_var0);
+        ctx->free_skip_var0 = false;
+    }
+    ctx->skip_var0 = cpu_skip;
+    return true;
+}
+
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+{
+    CPUAVRState *env = cs->env_ptr;
+    DisasContext ctx = {
+        .tb = tb,
+        .cs = cs,
+        .env = env,
+        .memidx = 0,
+        .bstate = DISAS_NEXT,
+        .skip_cond = TCG_COND_NEVER,
+        .singlestep = cs->singlestep_enabled,
+    };
+    target_ulong pc_start = tb->pc / 2;
+    int num_insns = 0;
+
+    INIT_UC_CONTEXT_FROM_DISAS(&ctx);
+    INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(&ctx);
+
+    if (tb->flags & TB_FLAGS_FULL_ACCESS) {
+        /*
+         * This flag is set by ST/LD instruction we will regenerate it ONLY
+         * with mem/cpu memory access instead of mem access
+         */
+        max_insns = 1;
+    }
+    if (ctx.singlestep) {
+        max_insns = 1;
+    }
+
+    // Unicorn: trace this block on request
+    bool block_hook = false;
+    TCGOp *block_prev_op = NULL;
+    if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_BLOCK, tb->pc)) {
+
+        // save the last operand
+        block_prev_op = tcg_last_op(tcg_ctx);
+        block_hook = true;
+        gen_uc_tracecode(tcg_ctx, 0xf8, UC_HOOK_BLOCK_IDX, uc, tb->pc);
+    }
+
+    gen_tb_start(tb);
+
+    ctx.npc = pc_start;
+    if (tb->flags & TB_FLAGS_SKIP) {
+        ctx.skip_cond = TCG_COND_ALWAYS;
+        ctx.skip_var0 = cpu_skip;
+    }
+
+    do {
+        TCGLabel *skip_label = NULL;
+
+        /* translate current instruction */
+        tcg_gen_insn_start(ctx.npc);
+        num_insns++;
+
+        /*
+         * this is due to some strange GDB behavior
+         * let's assume main has address 0x100
+         * b main   - sets breakpoint at address 0x00000100 (code)
+         * b *0x100 - sets breakpoint at address 0x00800100 (data)
+         */
+        if (unlikely(!ctx.singlestep &&
+                (cpu_breakpoint_test(cs, avr_code_base(env) | ctx.npc * 2, BP_ANY) ||
+                 cpu_breakpoint_test(cs, OFFSET_DATA | ctx.npc * 2, BP_ANY)))) {
+            canonicalize_skip(&ctx);
+            tcg_gen_movi_tl(cpu_pc, ctx.npc);
+            gen_helper_debug(cpu_env);
+            goto done_generating;
+        }
+
+        /* Conditionally skip the next instruction, if indicated.  */
+        if (ctx.skip_cond != TCG_COND_NEVER) {
+            skip_label = gen_new_label();
+            if (ctx.skip_var0 == cpu_skip) {
+                /*
+                 * Copy cpu_skip so that we may zero it before the branch.
+                 * This ensures that cpu_skip is non-zero after the label
+                 * if and only if the skipped insn itself sets a skip.
+                 */
+                ctx.free_skip_var0 = true;
+                ctx.skip_var0 = tcg_temp_new();
+                tcg_gen_mov_tl(ctx.skip_var0, cpu_skip);
+                tcg_gen_movi_tl(cpu_skip, 0);
+            }
+            if (ctx.skip_var1 == NULL) {
+                tcg_gen_brcondi_tl(ctx.skip_cond, ctx.skip_var0, 0, skip_label);
+            } else {
+                tcg_gen_brcond_tl(ctx.skip_cond, ctx.skip_var0,
+                                  ctx.skip_var1, skip_label);
+                ctx.skip_var1 = NULL;
+            }
+            if (ctx.free_skip_var0) {
+                tcg_temp_free(ctx.skip_var0);
+                ctx.free_skip_var0 = false;
+            }
+            ctx.skip_cond = TCG_COND_NEVER;
+            ctx.skip_var0 = NULL;
+        }
+
+        translate(&ctx);
+
+        if (skip_label) {
+            canonicalize_skip(&ctx);
+            gen_set_label(skip_label);
+            if (ctx.bstate == DISAS_NORETURN) {
+                ctx.bstate = DISAS_CHAIN;
+            }
+        }
+    } while (ctx.bstate == DISAS_NEXT
+             && num_insns < max_insns
+             && (ctx.npc - pc_start) * 2 < TARGET_PAGE_SIZE - 4
+             && !tcg_op_buf_full());
+
+    if (tb->cflags & CF_LAST_IO) {
+        gen_io_end();
+    }
+
+    bool nonconst_skip = canonicalize_skip(&ctx);
+
+    switch (ctx.bstate) {
+    case DISAS_NORETURN:
+        assert(!nonconst_skip);
+        break;
+    case DISAS_NEXT:
+    case DISAS_TOO_MANY:
+    case DISAS_CHAIN:
+        if (!nonconst_skip) {
+            /* Note gen_goto_tb checks singlestep.  */
+            gen_goto_tb(&ctx, 1, ctx.npc);
+            break;
+        }
+        tcg_gen_movi_tl(cpu_pc, ctx.npc);
+        /* fall through */
+    case DISAS_LOOKUP:
+        if (!ctx.singlestep) {
+            tcg_gen_lookup_and_goto_ptr();
+            break;
+        }
+        /* fall through */
+    case DISAS_EXIT:
+        if (ctx.singlestep) {
+            gen_helper_debug(cpu_env);
+        } else {
+            tcg_gen_exit_tb(NULL, 0);
+        }
+        break;
+    case DISAS_UC_EXIT:
+        tcg_gen_movi_tl(cpu_pc, ctx.npc);
+        gen_helper_uc_avr_exit(tcg_ctx, cpu_env);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+done_generating:
+    gen_tb_end(tb, num_insns);
+
+    tb->size = (ctx.npc - pc_start) * 2;
+    tb->icount = num_insns;
+
+    hooked_regions_check(uc, tb->pc, tb->size);
+
+    if (block_hook) {
+        // Unicorn: patch the callback to have the proper block size.
+        TCGOp *const tcg_op = block_prev_op ?
+            QTAILQ_NEXT(block_prev_op, link) : QTAILQ_FIRST(&tcg_ctx->ops);
+        tcg_op->args[1] = (ctx.npc - pc_start)*2;
+    }
+
+#ifdef DEBUG_DISAS
+#if 0
+    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
+        && qemu_log_in_addr_range(tb->pc)) {
+        FILE *fd;
+        fd = qemu_log_lock();
+        qemu_log("IN: %s\n", lookup_symbol(tb->pc));
+        log_target_disas(cs, tb->pc, tb->size);
+        qemu_log("\n");
+        qemu_log_unlock(fd);
+    }
+#endif
+#endif
+}
+
+void restore_state_to_opc(CPUAVRState *env, TranslationBlock *tb,
+                            target_ulong *data)
+{
+    env->pc_w = data[0];
+}
diff --git a/qemu/target/avr/unicorn.c b/qemu/target/avr/unicorn.c
new file mode 100644
index 0000000000..09b5f628dd
--- /dev/null
+++ b/qemu/target/avr/unicorn.c
@@ -0,0 +1,280 @@
+/* Unicorn Emulator Engine */
+/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2015 */
+
+/*
+   Created for Unicorn Engine by Glenn Baker <glenn.baker@gmx.com>, 2024
+*/
+
+#include "qemu/typedefs.h"
+#include "unicorn/unicorn.h"
+#include "sysemu/cpus.h"
+#include "sysemu/tcg.h"
+#include "cpu.h"
+#include "uc_priv.h"
+#include "unicorn_common.h"
+#include "unicorn.h"
+
+AVRCPU *cpu_avr_init(struct uc_struct *uc);
+
+static inline uint32_t get_pc(CPUAVRState *env)
+{
+    return env->pc_w*2;
+}
+
+static uint64_t avr_get_pc(struct uc_struct *uc)
+{
+    return get_pc((CPUAVRState *)uc->cpu->env_ptr);
+}
+
+static inline void set_pc(CPUAVRState *env, uint32_t value)
+{
+    env->pc_w = value/2;
+}
+
+static void avr_set_pc(struct uc_struct *uc, uint64_t address)
+{
+    set_pc((CPUAVRState *)uc->cpu->env_ptr, address);
+}
+
+static void reg_reset(struct uc_struct *uc)
+{
+}
+
+#define GET_BYTE(x, n)          (((x) >> (n)*8) & 0xff)
+#define SET_BYTE(x, n, b)       (x = ((x) & ~(0xff << ((n)*8))) | ((b) << ((n)*8)))
+#define GET_RAMP(reg)           GET_BYTE(env->glue(ramp,reg), 2)
+#define SET_RAMP(reg, val)      SET_BYTE(env->glue(ramp,reg), 2, val)
+
+DEFAULT_VISIBILITY
+uc_err reg_read(void *_env, int mode, unsigned int regid, void *value,
+                size_t *size)
+{
+    CPUAVRState *const env = _env;
+    uc_err ret = UC_ERR_ARG;
+
+    switch (regid) {
+    case UC_AVR_REG_PC:
+        CHECK_REG_TYPE(uint32_t);
+        *(uint32_t *)value = get_pc(env);
+        break;
+    case UC_AVR_REG_SP:
+        CHECK_REG_TYPE(uint32_t);
+        *(uint32_t *)value = env->sp;
+        break;
+
+    case UC_AVR_REG_RAMPD:
+        CHECK_REG_TYPE(uint8_t);
+        *(uint8_t *)value = GET_RAMP(D);
+        break;
+    case UC_AVR_REG_RAMPX:
+        CHECK_REG_TYPE(uint8_t);
+        *(uint8_t *)value = GET_RAMP(X);
+        break;
+    case UC_AVR_REG_RAMPY:
+        CHECK_REG_TYPE(uint8_t);
+        *(uint8_t *)value = GET_RAMP(Y);
+        break;
+    case UC_AVR_REG_RAMPZ:
+        CHECK_REG_TYPE(uint8_t);
+        *(uint8_t *)value = GET_RAMP(Z);
+        break;
+    case UC_AVR_REG_EIND:
+        CHECK_REG_TYPE(uint8_t);
+        *(uint8_t *)value = GET_BYTE(env->eind, 2);
+        break;
+    case UC_AVR_REG_SPL:
+        CHECK_REG_TYPE(uint8_t);
+        *(uint8_t *)value = GET_BYTE(env->sp, 0);
+        break;
+    case UC_AVR_REG_SPH:
+        CHECK_REG_TYPE(uint8_t);
+        *(uint8_t *)value = GET_BYTE(env->sp, 1);
+        break;
+    case UC_AVR_REG_SREG:
+        CHECK_REG_TYPE(uint8_t);
+        *(uint8_t *)value = cpu_get_sreg(env);
+        break;
+
+    default: {
+        uint64_t v = 0;
+        if (regid >= UC_AVR_REG_R0 && regid <= UC_AVR_REG_R31) {
+            CHECK_REG_TYPE(uint8_t);
+            *(int8_t *)value = (int8_t)env->r[regid - UC_AVR_REG_R0];
+        }
+        else if (regid >= UC_AVR_REG_R0W && regid <= UC_AVR_REG_R30W) {
+            const uint32_t *const r = &env->r[regid - UC_AVR_REG_R0W];
+            for (int k = 0; k < 2; k++)
+                SET_BYTE(v, k, (r[k] & 0xff));
+            CHECK_REG_TYPE(uint16_t);
+            *(int16_t *)value = (int16_t)v;
+        }
+        else if (regid >= UC_AVR_REG_R0D && regid <= UC_AVR_REG_R28D) {
+            const uint32_t *const r = &env->r[regid - UC_AVR_REG_R0D];
+            for (int k = 0; k < 4; k++)
+                SET_BYTE(v, k, (r[k] & 0xff));
+            CHECK_REG_TYPE(uint32_t);
+            *(int32_t *)value = (int32_t)v;
+        }
+        break;
+    }
+    }
+
+    CHECK_RET_DEPRECATE(ret, regid);
+    return ret;
+}
+
+DEFAULT_VISIBILITY
+uc_err reg_write(void *_env, int mode, unsigned int regid, const void *value,
+                 size_t *size, int *setpc)
+{
+    CPUAVRState *const env = _env;
+    uc_err ret = UC_ERR_ARG;
+
+    switch (regid) {
+    case UC_AVR_REG_PC:
+        CHECK_REG_TYPE(uint32_t);
+        set_pc(env, *(uint32_t *)value);
+        *setpc = 1;
+        break;
+    case UC_AVR_REG_SP:
+        CHECK_REG_TYPE(uint32_t);
+        env->sp = *(uint32_t *)value;
+        break;
+
+    case UC_AVR_REG_RAMPD:
+        CHECK_REG_TYPE(uint8_t);
+        SET_RAMP(D, *(uint8_t *)value);
+        break;
+    case UC_AVR_REG_RAMPX:
+        CHECK_REG_TYPE(uint8_t);
+        SET_RAMP(X, *(uint8_t *)value);
+        break;
+    case UC_AVR_REG_RAMPY:
+        CHECK_REG_TYPE(uint8_t);
+        SET_RAMP(Y, *(uint8_t *)value);
+        break;
+    case UC_AVR_REG_RAMPZ:
+        CHECK_REG_TYPE(uint8_t);
+        SET_RAMP(Z, *(uint8_t *)value);
+        break;
+    case UC_AVR_REG_EIND:
+        CHECK_REG_TYPE(uint8_t);
+        SET_BYTE(env->eind, 2, *(uint8_t *)value);
+        break;
+    case UC_AVR_REG_SPL:
+        CHECK_REG_TYPE(uint8_t);
+        SET_BYTE(env->sp, 0, *(uint8_t *)value);
+        break;
+    case UC_AVR_REG_SPH:
+        CHECK_REG_TYPE(uint8_t);
+        SET_BYTE(env->sp, 1, *(uint8_t *)value);
+        break;
+    case UC_AVR_REG_SREG:
+        CHECK_REG_TYPE(uint8_t);
+        cpu_set_sreg(env, *(uint8_t *)value);
+        break;
+
+    default: {
+        uint64_t v;
+        uint32_t *r = NULL;
+        int rlen = 0;
+        if (regid >= UC_AVR_REG_R0 && regid <= UC_AVR_REG_R31) {
+            v = *(uint8_t *)value;
+            r = &env->r[regid - UC_AVR_REG_R0];
+            rlen = 1;
+            CHECK_REG_TYPE(uint8_t);
+        }
+        else if (regid >= UC_AVR_REG_R0W && regid <= UC_AVR_REG_R30W) {
+            v = *(uint16_t *)value;
+            r = &env->r[regid - UC_AVR_REG_R0W];
+            rlen = 2;
+            CHECK_REG_TYPE(uint16_t);
+        }
+        else if (regid >= UC_AVR_REG_R0D && regid <= UC_AVR_REG_R28D) {
+            v = *(uint32_t *)value;
+            r = &env->r[regid - UC_AVR_REG_R0D];
+            rlen = 4;
+            CHECK_REG_TYPE(uint32_t);
+        }
+        if (r && rlen > 0) {
+            for (int k = 0; k < rlen; k++)
+                r[k] = GET_BYTE(v, k);
+        }
+    }
+    }
+
+    CHECK_RET_DEPRECATE(ret, regid);
+    return ret;
+}
+
+static int avr_cpus_init(struct uc_struct *uc, const char *cpu_model)
+{
+    AVRCPU *cpu;
+
+    cpu = cpu_avr_init(uc);
+    if (cpu == NULL) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static void avr_release(void *ctx)
+{
+    int i;
+    TCGContext *tcg_ctx = (TCGContext *)ctx;
+    AVRCPU *cpu = (AVRCPU *)tcg_ctx->uc->cpu;
+    CPUTLBDesc *d = cpu->neg.tlb.d;
+    CPUTLBDescFast *f = cpu->neg.tlb.f;
+    CPUTLBDesc *desc;
+    CPUTLBDescFast *fast;
+
+    release_common(ctx);
+    for (i = 0; i < NB_MMU_MODES; i++) {
+        desc = &(d[i]);
+        fast = &(f[i]);
+        g_free(desc->iotlb);
+        g_free(fast->table);
+    }
+}
+
+static inline bool is_flash_memory(hwaddr addr, size_t size, uint32_t perms)
+{
+    if ((addr ^ UC_AVR_MEM_FLASH) >> 24)
+        return false;
+    if ((perms & UC_PROT_ALL) != (UC_PROT_READ|UC_PROT_EXEC))
+        return false;
+    return true;
+}
+
+static MemoryRegion *avr_memory_map(struct uc_struct *uc, hwaddr begin, size_t size, uint32_t perms)
+{
+    MemoryRegion *const mr = memory_map(uc, begin, size, perms);
+    if (mr && is_flash_memory(begin, size, perms))
+        set_avr_feature(&AVR_CPU(uc->cpu)->env, AVR_FEATURE_FLASH);
+    return mr;
+}
+
+static MemoryRegion *avr_memory_map_ptr(struct uc_struct *uc, hwaddr begin, size_t size, uint32_t perms, void *ptr)
+{
+    MemoryRegion *const mr = memory_map_ptr(uc, begin, size, perms, ptr);
+    if (mr && is_flash_memory(begin, size, perms))
+        set_avr_feature(&AVR_CPU(uc->cpu)->env, AVR_FEATURE_FLASH);
+    return mr;
+}
+
+DEFAULT_VISIBILITY
+void uc_init(struct uc_struct *uc)
+{
+    uc->reg_read = reg_read;
+    uc->reg_write = reg_write;
+    uc->reg_reset = reg_reset;
+    uc->set_pc = avr_set_pc;
+    uc->get_pc = avr_get_pc;
+    uc->cpus_init = avr_cpus_init;
+    uc->release = avr_release;
+    uc->cpu_context_size = offsetof(CPUAVRState, features);
+    uc_common_init(uc);
+    uc->memory_map = avr_memory_map;
+    uc->memory_map_ptr = avr_memory_map_ptr;
+}
diff --git a/qemu/target/avr/unicorn.h b/qemu/target/avr/unicorn.h
new file mode 100644
index 0000000000..a90b109016
--- /dev/null
+++ b/qemu/target/avr/unicorn.h
@@ -0,0 +1,21 @@
+/* Unicorn Emulator Engine */
+/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2015 */
+
+/*
+   Modified for Unicorn Engine by Glenn Baker <glenn.baker@gmx.com>, 2024
+*/
+
+#ifndef UC_QEMU_TARGET_AVR_H
+#define UC_QEMU_TARGET_AVR_H
+
+// functions to read & write registers
+uc_err reg_read_avr(void *env, int mode, unsigned int regid, void *value,
+                    size_t *size);
+uc_err reg_write_avr(void *env, int mode, unsigned int regid,
+                     const void *value, size_t *size, int *setpc);
+
+void uc_init_avr(struct uc_struct *uc);
+
+int avr_cpu_model_valid(int cpu_model);
+
+#endif /* UC_QEMU_TARGET_AVR_H */
diff --git a/qemu/target/avr/unicorn_helper.h b/qemu/target/avr/unicorn_helper.h
new file mode 100644
index 0000000000..117b375f19
--- /dev/null
+++ b/qemu/target/avr/unicorn_helper.h
@@ -0,0 +1,165 @@
+#ifndef QEMU_UNICORN_HELPER_H
+#define QEMU_UNICORN_HELPER_H
+
+#include <uc_priv.h>
+
+#define UC_GET_TCG_CONTEXT(uc)          ((uc)->tcg_ctx)
+#define DISAS_GET_UC_CONTEXT(ctx)       ((ctx)->env->uc)
+#define DISAS_GET_TCG_CONTEXT(ctx)      UC_GET_TCG_CONTEXT(DISAS_GET_UC_CONTEXT(ctx))
+
+#define INIT_UC_CONTEXT_FROM_DISAS(ctx) \
+    struct uc_struct *const uc = DISAS_GET_UC_CONTEXT(ctx)
+#define INIT_TCG_CONTEXT_FROM_UC(uc) \
+    TCGContext *const tcg_ctx = UC_GET_TCG_CONTEXT(uc)
+#define INIT_CPU_ENV_FROM_TCG_CONTEXT(ctx) \
+    TCGv_ptr const cpu_env = (ctx)->cpu_env
+#define INIT_TCG_CONTEXT_FROM_DISAS(ctx) \
+    INIT_TCG_CONTEXT_FROM_UC((ctx)->env->uc)
+#define INIT_TCG_CONTEXT_AND_CPU_ENV_FROM_DISAS(ctx) \
+    INIT_TCG_CONTEXT_FROM_DISAS(ctx); \
+    INIT_CPU_ENV_FROM_TCG_CONTEXT(tcg_ctx)
+
+/* "qapi/error.h */
+#if 0
+#include <error.h>
+#define error_report(...) \
+    (error)(EXIT_FAILURE, 0, __VA_ARGS__)
+#endif
+
+/* "exec/address-spaces.h" */
+#define address_space_memory \
+    (cpu->uc->address_space_memory)
+#define address_space_ldub(...) \
+    glue(address_space_ldub, UNICORN_ARCH_POSTFIX)(uc, __VA_ARGS__)
+#define address_space_stb(...) \
+    glue(address_space_stb, UNICORN_ARCH_POSTFIX)(uc, __VA_ARGS__)
+
+/* "tcg/tch.h" */
+#define tcg_wrapper_I(func, ...) \
+    (glue(tcg_,func))(tcg_ctx, ## __VA_ARGS__)
+#define tcg_wrapper_X(func, ...) \
+    tcg_wrapper_I(glue(func,_avr), ## __VA_ARGS__)
+#define tcg_wrapper_tl(func, ...) \
+    tcg_wrapper_I(glue(func,_i32), ## __VA_ARGS__)
+
+#undef tcg_const_i32
+#define tcg_const_i32(...)		tcg_wrapper_X(const_i32, __VA_ARGS__)
+#undef tcg_gen_addi_i32
+#define tcg_gen_addi_i32(...)		tcg_wrapper_X(gen_addi_i32, __VA_ARGS__)
+//#undef tcg_gen_addi_tl
+//#define tcg_gen_addi_tl(...)		tcg_wrapper_tl(gen_addi, __VA_ARGS__)
+#undef tcg_gen_add_i32
+#define tcg_gen_add_i32(...)		tcg_wrapper_I(gen_add_i32, __VA_ARGS__)
+#undef tcg_gen_add_tl
+#define tcg_gen_add_tl(...)		tcg_wrapper_tl(gen_add, __VA_ARGS__)
+#undef tcg_gen_andc_i32
+#define tcg_gen_andc_i32(...)		tcg_wrapper_X(gen_andc_i32, __VA_ARGS__)
+//#undef tcg_gen_andc_tl
+//#define tcg_gen_andc_tl(...)		tcg_wrapper_tl(gen_andc, __VA_ARGS__)
+#undef tcg_gen_andi_i32
+#define tcg_gen_andi_i32(...)		tcg_wrapper_X(gen_andi_i32, __VA_ARGS__)
+//#undef tcg_gen_andi_tl
+//#define tcg_gen_andi_tl(...)		tcg_wrapper_tl(gen_andi, __VA_ARGS__)
+#undef tcg_gen_and_i32
+#define tcg_gen_and_i32(...)		tcg_wrapper_I(gen_and_i32, __VA_ARGS__)
+#undef tcg_gen_and_tl
+#define tcg_gen_and_tl(...)		tcg_wrapper_tl(gen_and, __VA_ARGS__)
+#undef tcg_gen_brcondi_i32
+#define tcg_gen_brcondi_i32(...)	tcg_wrapper_X(gen_brcondi_i32, __VA_ARGS__)
+//#undef tcg_gen_brcondi_tl
+//#define tcg_gen_brcondi_tl(...)		tcg_wrapper_tl(gen_brcondi, __VA_ARGS__)
+#undef tcg_gen_brcond_i32
+#define tcg_gen_brcond_i32(...)		tcg_wrapper_X(gen_brcond_i32, __VA_ARGS__)
+//#undef tcg_gen_brcond_tl
+//#define tcg_gen_brcond_tl(...)		tcg_wrapper_tl(gen_brcond, __VA_ARGS__)
+#undef tcg_gen_deposit_i32
+#define tcg_gen_deposit_i32(...)	tcg_wrapper_X(gen_deposit_i32, __VA_ARGS__)
+//#undef tcg_gen_deposit_tl
+//#define tcg_gen_deposit_tl(...)		tcg_wrapper_tl(gen_deposit, __VA_ARGS__)
+#undef tcg_gen_exit_tb
+#define tcg_gen_exit_tb(...)		tcg_wrapper_X(gen_exit_tb, __VA_ARGS__)
+#undef tcg_gen_ext8s_tl
+#define tcg_gen_ext8s_tl(...)		tcg_wrapper_tl(gen_ext8s, __VA_ARGS__)
+#undef tcg_gen_goto_tb
+#define tcg_gen_goto_tb(...)		tcg_wrapper_X(gen_goto_tb, __VA_ARGS__)
+#undef tcg_gen_insn_start
+#define tcg_gen_insn_start(...)		tcg_wrapper_I(gen_insn_start, __VA_ARGS__)
+#undef tcg_gen_movcond_tl
+#define tcg_gen_movcond_tl(...)		tcg_wrapper_tl(gen_movcond, __VA_ARGS__)
+#undef tcg_gen_movi_i32
+#define tcg_gen_movi_i32(...)		tcg_wrapper_I(gen_movi_i32, __VA_ARGS__)
+//#undef tcg_gen_movi_i32
+//#define tcg_gen_movi_i32(...)		tcg_wrapper(gen_movi_i32, __VA_ARGS__)
+#undef tcg_gen_movi_tl
+#define tcg_gen_movi_tl(...)		tcg_wrapper_tl(gen_movi, __VA_ARGS__)
+#undef tcg_gen_mov_i32
+#define tcg_gen_mov_i32(...)		tcg_wrapper(gen_mov_i32, __VA_ARGS__)
+#undef tcg_gen_mov_tl
+#define tcg_gen_mov_tl(...)		tcg_wrapper_tl(gen_mov, __VA_ARGS__)
+#undef tcg_gen_mul_i32
+#define tcg_gen_mul_i32(...)		tcg_wrapper(gen_mul_i32, __VA_ARGS__)
+#undef tcg_gen_mul_tl
+#define tcg_gen_mul_tl(...)		tcg_wrapper_tl(gen_mul, __VA_ARGS__)
+#undef tcg_gen_not_i32
+#define tcg_gen_not_i32(...)		tcg_wrapper(gen_not_i32, __VA_ARGS__)
+#undef tcg_gen_not_tl
+#define tcg_gen_not_tl(...)		tcg_wrapper_tl(gen_not, __VA_ARGS__)
+#undef tcg_gen_ori_i32
+#define tcg_gen_ori_i32(...)		tcg_wrapper_X(gen_ori_i32, __VA_ARGS__)
+//#undef tcg_gen_ori_tl
+//#define tcg_gen_ori_tl(...)		tcg_wrapper_tl(gen_ori, __VA_ARGS__)
+#undef tcg_gen_or_i32
+#define tcg_gen_or_i32(...)		tcg_wrapper_I(gen_or_i32, __VA_ARGS__)
+#undef tcg_gen_or_tl
+#define tcg_gen_or_tl(...)		tcg_wrapper_tl(gen_or, __VA_ARGS__)
+#undef tcg_gen_qemu_ld8u
+#define tcg_gen_qemu_ld8u(...)		tcg_wrapper_I(gen_qemu_ld8u, __VA_ARGS__)
+#undef tcg_gen_qemu_ld_tl
+#define tcg_gen_qemu_ld_tl(...)		tcg_wrapper_tl(gen_qemu_ld, __VA_ARGS__)
+#undef tcg_gen_qemu_st8
+#define tcg_gen_qemu_st8(...)		tcg_wrapper_I(gen_qemu_st8, __VA_ARGS__)
+#undef tcg_gen_qemu_st_tl
+#define tcg_gen_qemu_st_tl(...)		tcg_wrapper_tl(gen_qemu_st, __VA_ARGS__)
+#undef tcg_gen_setcondi_tl
+#define tcg_gen_setcondi_tl(...)	tcg_wrapper_tl(gen_setcondi, __VA_ARGS__)
+#undef tcg_gen_setcond_tl
+#define tcg_gen_setcond_tl(...)		tcg_wrapper_tl(gen_setcond, __VA_ARGS__)
+#undef tcg_gen_shli_i32
+#define tcg_gen_shli_i32(...)		tcg_wrapper_X(gen_shli_i32, __VA_ARGS__)
+//#undef tcg_gen_shli_tl
+//#define tcg_gen_shli_tl(...)		tcg_wrapper_tl(gen_shli, __VA_ARGS__)
+#undef tcg_gen_shri_i32
+#define tcg_gen_shri_i32(...)		tcg_wrapper_X(gen_shri_i32, __VA_ARGS__)
+//#undef tcg_gen_shri_tl
+//#define tcg_gen_shri_tl(...)		tcg_wrapper_tl(gen_shri, __VA_ARGS__)
+#undef tcg_gen_subi_i32
+#define tcg_gen_subi_i32(...)		tcg_wrapper_X(gen_subi_i32, __VA_ARGS__)
+//#undef tcg_gen_subi_tl
+//#define tcg_gen_subi_tl(...)		tcg_wrapper_tl(gen_subi, __VA_ARGS__)
+#undef tcg_gen_sub_i32
+#define tcg_gen_sub_i32(...)		tcg_wrapper(gen_sub_i32, __VA_ARGS__)
+#undef tcg_gen_sub_tl
+#define tcg_gen_sub_tl(...)		tcg_wrapper_tl(gen_sub, __VA_ARGS__)
+#undef tcg_gen_xori_i32
+#define tcg_gen_xori_i32(...)		tcg_wrapper_X(gen_xori_i32, __VA_ARGS__)
+//#undef tcg_gen_xori_tl
+//#define tcg_gen_xori_tl(...)		tcg_wrapper_tl(gen_xori, __VA_ARGS__)
+#undef tcg_gen_xor_i32
+#define tcg_gen_xor_i32(...)		tcg_wrapper(gen_xor_i32, __VA_ARGS__)
+#undef tcg_gen_xor_tl
+#define tcg_gen_xor_tl(...)		tcg_wrapper_tl(gen_xor, __VA_ARGS__)
+#undef tcg_global_mem_new_i32
+#define tcg_global_mem_new_i32(...)	tcg_wrapper_I(global_mem_new_i32, __VA_ARGS__)
+#undef tcg_temp_new_i32
+#define tcg_temp_new_i32()		tcg_wrapper_I(temp_new_i32)
+#undef tcg_temp_free
+#define tcg_temp_free(...)		tcg_wrapper_tl(temp_free, __VA_ARGS__)
+#undef tcg_temp_free_i32
+#define tcg_temp_free_i32(...)		tcg_wrapper_I(temp_free_i32, __VA_ARGS__)
+#undef tcg_op_buf_full
+#define tcg_op_buf_full()		tcg_wrapper_I(op_buf_full)
+#undef tcg_gen_lookup_and_goto_ptr
+#define tcg_gen_lookup_and_goto_ptr() \
+    tcg_wrapper_X(gen_lookup_and_goto_ptr)
+
+#endif /* QEMU_UNICORN_HELPER_H */
diff --git a/qemu/target/rh850/Makefile.objs b/qemu/target/rh850/Makefile.objs
new file mode 100644
index 0000000000..aaa7c0cc64
--- /dev/null
+++ b/qemu/target/rh850/Makefile.objs
@@ -0,0 +1 @@
+obj-y += translate.o op_helper.o helper.o cpu.o fpu_helper.o gdbstub.o fpu_translate.o
diff --git a/qemu/target/rh850/cpu-param.h b/qemu/target/rh850/cpu-param.h
new file mode 100644
index 0000000000..24231873c3
--- /dev/null
+++ b/qemu/target/rh850/cpu-param.h
@@ -0,0 +1,11 @@
+#pragma once
+
+/* QEMU addressing/paging config */
+#define TARGET_PAGE_BITS 12 /* 4 KiB Pages */
+
+#define TARGET_LONG_BITS 32
+#define TARGET_PHYS_ADDR_SPACE_BITS 32
+#define TARGET_VIRT_ADDR_SPACE_BITS 32
+
+#define NB_MMU_MODES 4
+
diff --git a/qemu/target/rh850/cpu.c b/qemu/target/rh850/cpu.c
new file mode 100644
index 0000000000..b6b44b28d2
--- /dev/null
+++ b/qemu/target/rh850/cpu.c
@@ -0,0 +1,473 @@
+/*
+ * QEMU RH850 CPU
+ *
+ * Copyright (c) 2018-2019 iSYSTEM Labs d.o.o.
+ * Copyright (c) 2023 Quarkslab
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/ctype.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+
+/* RH850 CPU definitions */
+
+/* Program registers (rh850_prog_regnames):
+ * r0 - zero
+ * r1 - assembler reserved register
+ * r2 - real-time OS register / address and data variable register
+ * r3 - stack pointer
+ * r4 - global pointer
+ * r5 - text pointer
+ * r6-r29 - address and data variable registers
+ * r30 - element pointer
+ * r31 - link pointer
+ */
+
+const char * const rh850_gp_regnames[] = {
+  "r0-zero", "r1", "r2", "r3-sp", "r4", "r5", "r6", "r7",
+  "r8", "r9", "r10 ", "r11", "r12", "r13", "r14", "r15",
+  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r2 ",
+  "r24", "r25", "r26", "r27", "r28", "r29", "r30-ep", "r31-lp"
+};
+
+// Basic system registers
+const char * const rh850_sys_regnames[][MAX_SYS_REGS_IN_BANK] = {
+
+{ // SELECTION ID 0                           [5] used to be psw, but now it is stored in flags only
+  "eipc",  "eipsw", "fepc",   "fepsw", NULL,  NULL,    "fpsr",   "fpepc", "fpst",  "fpcc",
+  "fpcfg", "fpec",  NULL,     "eiic",  "feic", NULL,    "ctpc",   "ctpsw", NULL,    NULL,
+  "ctbp",  NULL,    NULL,     NULL,    NULL,   NULL,    NULL,     NULL,    "eiwr",  "fewr",
+  NULL,    "bsel"},
+{ // SELECTION ID 1
+  "mcfg0", NULL,    "rbase",  "ebase", "intbp", "mctl", "pid",    "fpipr", NULL,    NULL,
+  NULL,    "sccfg", "scbp",
+},
+{ // SELECTION ID 2
+  "htcfg0",NULL,    NULL,     NULL,    NULL,    NULL,   "mea",    "asid",  "mei",   NULL,
+  "ispr",  "pmr",   "icsr",   "intcfg"
+},
+{ // SELECTION ID 3
+    NULL,  NULL,    NULL,     NULL,    NULL,    NULL,   NULL,     NULL,    NULL,    NULL
+},
+{ // SELECTION ID 4
+  NULL,    NULL,    NULL,     NULL,    NULL,    NULL,    NULL,     NULL,    NULL,    NULL,
+  NULL,    NULL,    NULL,     NULL,    NULL,    NULL,    "ictagl", "ictagh","icdatl","icdath",
+  NULL,    NULL,    NULL,     NULL,    "icctrl",NULL,    "iccfg",  NULL,    "icerr", NULL
+},
+{ // SELECTION ID 5
+  "mpm",   "mprc",  NULL,     NULL,    "mpbrgn","mptrgn",NULL,     NULL,    "mca",   "mcs"
+  "mcc",   "mcr"
+},
+{ // SELECTION ID 6
+  "mpla0", "mpua0", "mpat0",  NULL,    "mpla1", "mpua1", "mpat1",  NULL,    "mpla2", "mpua2",
+  "mpat2", NULL,    "mpla3",  "mpua3", "mpat3", NULL,    "mpla4",  "mpua4", "mpat4", NULL,
+  "mpla5", "mpua5", "mpat5",  NULL,    "mpla6",  "mpua6", "mpat6", NULL,    "mpla7", "mpua7",
+  "mpat7", NULL
+},
+{ // SELECTION ID 7
+    /* MPU function system registers */
+  "mpla8", "mpua8", "mpat8",  NULL,    "mpla9",  "mpua9", "mpat9", NULL,    "mpla10","mpua10",
+  "mpat10",NULL,    "mpla11", "mpua11", "mpat11",NULL,    "mpla12","mpua12","mpat12",NULL,
+  "mpla13","mpua13","mpat13", NULL,     "mpla14","mpua14","mpat14",NULL,    "mpla15","mpua15",
+  "mpat15",NULL
+}
+};
+
+// Where bits are read only, mask is set to 0
+const uint32_t rh850_sys_reg_read_only_masks[][MAX_SYS_REGS_IN_BANK] = {
+
+{	//SELECTION ID 0                                            PSW - implemented as registers for each used bit, see cpu_ZF, ...
+	0xFFFFFFFF, 0x40078EFF, 0xFFFFFFFF, 0x40078EFF, 0x0, /*0x40018EFF*/  0, 0xFFEEFFFF, 0xFFFFFFFE, 0x00003F3F, 0x000000FF,
+	0x0000031F, 0x00000001, 0x0,		0xFFFFFFFF, 0xFFFFFFFF, 0x0,		0xFFFFFFFF, 0x0000001F, 0x0,		0x0,
+	0xFFFFFFFE, 0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0xFFFFFFFF, 0xFFFFFFFF,
+	0x0, 		0x0
+},
+{	//SELECTION ID 1
+    // for MCFG (idx = 0), byte 3 seems to not be writable, at least on devicee used for testing
+	0x00000000, 0x0, 		0x00000000, 0xFFFFFE01, 0xFFFFFE00, 0x00000003, 0x00000000, 0x0000001F, 0x0, 		0x0,
+	0x0, 		0x000000FF, 0xFFFFFFFC
+},
+{	//SELECTION ID 2
+	0x00000000, 0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0xFFFFFFFF, 0x000003FF, 0x001F073F, 0x0,
+	0x00000000, 0x0000FFFF, 0x00000000, 0x00000001
+},
+{	//SELECTION ID 3
+	0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0
+},
+{	//SELECTION ID 4
+	0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0,
+	0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0xFFFFFA35, 0xF0FFFF00, 0xFFFFFFFF, 0xFFFFFFFF,
+	0x0, 		0x0, 		0x0, 		0x0, 		0x00020107, 0x0, 		0x00000000, 0x0, 		0xBF3F7FFD, 0x0
+},
+{	//SELECTION ID 5
+	0x00000003, 0x0000FFFF, 0x0, 		0x0, 		0x00000000, 0x00000000, 0x0, 		0x0, 		0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0x0000013F
+},
+{	//SELECTION ID 6
+	0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFF,
+	0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0,
+	0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFC,
+	0x03FF00FF, 0x0
+},
+{	//SELECTION ID 7
+	0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFF,
+	0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0,
+	0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFC, 0x03FF00FF, 0x0, 		0xFFFFFFFC, 0xFFFFFFFC,
+	0x03FF00FF, 0x0
+}
+};
+
+
+const uint32_t rh850_sys_reg_read_only_values[][MAX_SYS_REGS_IN_BANK] = {
+{	//SELECTION ID 0
+	0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0,
+	0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0,
+	0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0, 		0x0,
+	0x0,		0x0
+},
+{	//SELECTION ID 1
+	0x4,		0x0,		0x0,		0x0,		0x0,		0x80000000, 0x12345678, 0x0,		0x0,		0x0,
+	0x0,		0x0,		0x0
+},
+{	//SELECTION ID 2
+	0x00008000, 0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,
+	0x0,		0x0,		0x0,		0x0
+},
+{	//SELECTION ID 3
+	0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0
+},
+{	//SELECTION ID 4
+	0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,
+	0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,
+	0x0,		0x0,		0x0,		0x0,		0x00010000, 0x0,		0x00010000,	0x0,		0x0,		0x0
+},
+{	//SELECTION ID 5
+	0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,
+	0x0,		0x0
+},
+{	//SELECTION ID 6
+	0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,
+	0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,
+	0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,
+	0x0,		0x0
+},
+{	//SELECTION ID 7
+	0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,
+	0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,
+	0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,		0x0,
+	0x0,		0x0
+}
+};
+
+
+
+/*Data Buffer Operation Registers (rh850_sys_databuff_regnames):
+ * sr24, 13 - cbdcr */
+const char * const rh850_sys_databuff_regnames[] = { /* Data buffer operation registers */
+  "cbdcr"
+};
+
+const char * const rh850_excp_names[] = {
+    "misaligned_fetch",
+    "fault_fetch",
+    "illegal_instruction",
+    "breakpoint",
+    "misaligned_load",
+    "fault_load",
+    "misaligned_store",
+    "fault_store",
+    "user_ecall",
+    "supervisor_ecall",
+    "hypervisor_ecall",
+    "machine_ecall",
+    "exec_page_fault",
+    "load_page_fault",
+    "reserved",
+    "store_page_fault"
+};
+
+const char * const rh850_intr_names[] = {
+    "u_software",
+    "s_software",
+    "h_software",
+    "m_software",
+    "u_timer",
+    "s_timer",
+    "h_timer",
+    "m_timer",
+    "u_external",
+    "s_external",
+    "h_external",
+    "m_external",
+    "coprocessor",
+    "host"
+};
+
+
+void rh850_cpu_set_pc(CPUState *cs, vaddr value)
+{
+    RH850CPU *cpu = RH850_CPU(cs);
+    CPURH850State *env = &cpu->env;
+    env->pc = value;
+}
+
+vaddr rh850_cpu_get_pc(CPUState *cs)
+{
+    RH850CPU *cpu = RH850_CPU(cs);
+    CPURH850State *env = &cpu->env;
+    return env->pc;
+}
+
+AddressSpace *cpu_addressspace(CPUState *cs, MemTxAttrs attrs)
+{
+    return cpu_get_address_space(cs, cpu_asidx_from_attrs(cs, attrs));
+}
+
+
+/* called by qemu's softmmu to fill the qemu tlb */
+static bool rh850_tlb_fill(CPUState *cs, vaddr addr, int size,
+                           MMUAccessType access_type, int mmu_idx,
+                           bool probe, uintptr_t retaddr)
+{
+    int ret;
+    ret = rh850_cpu_handle_mmu_fault(cs, addr, size, access_type, mmu_idx);
+    if (ret == TRANSLATE_FAIL) {
+        RH850CPU *cpu = RH850_CPU(cs);
+        CPURH850State *env = &cpu->env;
+        do_raise_exception_err(env, cs->exception_index, retaddr);
+    }
+    return true;
+}
+
+
+static void rh850_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+{
+    RH850CPU *cpu = RH850_CPU(cs);
+    CPURH850State *env = &cpu->env;
+    env->pc = tb->pc;
+}
+
+static bool rh850_cpu_has_work(CPUState *cs)
+{
+#ifndef CONFIG_USER_ONLY
+    return true;
+#else
+    return true;
+#endif
+}
+
+void restore_state_to_opc(CPURH850State *env, TranslationBlock *tb,
+                          target_ulong *data)
+{
+    env->pc = data[0];
+}
+
+
+static void rh850_raise_exception(CPURH850State *env, uint32_t excp,
+                           uint32_t syndrome, uint32_t target_el)
+{
+    CPUState *cs = CPU(rh850_env_get_cpu(env));
+
+    cs->exception_index = excp;
+    cpu_loop_exit(cs);
+}
+
+
+static void rh850_debug_excp_handler(CPUState *cs)
+{
+    /* Called by core code when a watchpoint or breakpoint fires;
+     * need to check which one and raise the appropriate exception.
+     */
+    RH850CPU *cpu = RH850_CPU(cs);
+    CPURH850State *env = &cpu->env;
+    CPUWatchpoint *wp_hit = cs->watchpoint_hit;
+
+    if (wp_hit) {
+        if (wp_hit->flags & BP_CPU) {
+            // bool wnr = (wp_hit->flags & BP_WATCHPOINT_HIT_WRITE) != 0;
+            // bool same_el = true;
+
+            cs->watchpoint_hit = NULL;
+
+            // env->exception.fsr = arm_debug_exception_fsr(env);
+            // env->exception.vaddress = wp_hit->hitaddr;
+            rh850_raise_exception(env, 0, 0, 0);
+        }
+    } else {
+        uint64_t pc = env->pc;
+        // bool same_el = true;
+
+        /* (1) GDB breakpoints should be handled first.
+         * (2) Do not raise a CPU exception if no CPU breakpoint has fired,
+         * since singlestep is also done by generating a debug internal
+         * exception.
+         */
+        if (!cpu_breakpoint_test(cs, pc, BP_GDB)  &&
+             cpu_breakpoint_test(cs, pc, BP_CPU)) {
+
+            rh850_raise_exception(env, 0, 0, 0);
+        }
+    }
+}
+
+static bool check_watchpoints(RH850CPU *cpu)
+{
+    return true;
+}
+
+
+static bool rh850_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
+{
+    /* Called by core code when a CPU watchpoint fires; need to check if this
+     * is also an architectural watchpoint match.
+     */
+    RH850CPU *cpu = RH850_CPU(cs);
+
+    return check_watchpoints(cpu);
+}
+
+
+static void rh850_cpu_reset(CPUState *cs)
+{
+
+	RH850CPU *cpu = RH850_CPU(cs);
+    RH850CPUClass *mcc = RH850_CPU_GET_CLASS(cpu);
+    CPURH850State *env = &cpu->env;
+
+    mcc->parent_reset(cs);
+    cs->exception_index = EXCP_NONE;
+    set_default_nan_mode(1, &env->fp_status);
+    env->pc = 0; // move to direct vector ? (always 0?)
+    env->ID_flag = 1;   // interrupts are disable on reset
+    env->systemRegs[BANK_ID_BASIC_0][EIPSW_IDX] = 0x20;
+    env->systemRegs[BANK_ID_BASIC_0][FEPSW_IDX] = 0x20;
+    env->systemRegs[BANK_ID_BASIC_0][EIIC_IDX] = 0x0;
+    env->systemRegs[BANK_ID_BASIC_0][FEIC_IDX] = 0x0;
+    env->systemRegs[BANK_ID_BASIC_0][PSW_IDX] = 0x20; // reset value of PSW
+    env->systemRegs[BANK_ID_BASIC_0][CTPSW_IDX] = 0;
+    env->systemRegs[BANK_ID_BASIC_0][CTBP_IDX] = 0;   // only bit 0 must be set to 0
+    env->systemRegs[BANK_ID_BASIC_2][ASID_IDX2] = 0;   // only bits 31-10 must be set to 0
+    env->systemRegs[BANK_ID_BASIC_2][HTCFG0_IDX2] = 0x00018000;   // const value
+    env->systemRegs[BANK_ID_BASIC_2][MEI_IDX2] = 0;    // only some bits must be 0
+    env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1] = 0;
+    env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1] = 0;  // only bits 8-1 must be 0
+    env->systemRegs[BANK_ID_BASIC_1][INTBP_IDX1] = 0;  // only bits 8-0 must be 0
+    env->systemRegs[BANK_ID_BASIC_1][PID_IDX1] = 0x05000120;  // const
+    env->systemRegs[BANK_ID_BASIC_1][SCCFG_IDX1] = 0;  // bits 31-8 must be 0
+    env->systemRegs[BANK_ID_BASIC_1][SCBP_IDX1] = 0;  // bits 1-0 must be 0
+    env->systemRegs[BANK_ID_BASIC_1][MCFG0_IDX1] = 0x4;  // bits 31-8 must be 0
+    env->systemRegs[BANK_ID_BASIC_1][MCTL_IDX1] = 0x80000000;  // bits 31-8 must be 0
+
+    env->systemRegs[BANK_ID_BASIC_2][FPIPR_IDX1] = 0;
+    env->systemRegs[BANK_ID_BASIC_2][ISPR_IDX2] = 0;
+    env->systemRegs[BANK_ID_BASIC_2][PMR_IDX2] = 0;
+    env->systemRegs[BANK_ID_BASIC_2][ICSR_IDX2] = 0;
+    env->systemRegs[BANK_ID_BASIC_2][INTCFG_IDX2] = 0;
+}
+
+static void rh850_cpu_realize(struct uc_struct *uc, CPUState *dev)
+{
+    CPUState *cs = CPU(dev);
+
+    cpu_exec_realizefn(cs);
+    
+    qemu_init_vcpu(cs);
+    
+    cpu_reset(cs);
+}
+
+static void rh850_cpu_init(struct uc_struct *uc, CPUState *obj)
+{
+    CPUState *cs = CPU(obj);
+    RH850CPU *cpu = RH850_CPU(obj);
+
+    /* Set CPU pointers. */
+    cpu_set_cpustate_pointers(cpu);
+
+    cs->env_ptr = &cpu->env;
+    cpu->env.uc = uc;
+}
+
+static void rh850_cpu_class_init(struct uc_struct *uc, CPUClass *c)
+{
+    RH850CPUClass *mcc = RH850_CPU_CLASS(c);
+    CPUClass *cc = CPU_CLASS(c);
+
+    mcc->parent_reset = cc->reset;
+    cc->reset = rh850_cpu_reset;
+
+    cc->has_work = rh850_cpu_has_work;
+    cc->do_interrupt = rh850_cpu_do_interrupt;
+    cc->cpu_exec_interrupt = rh850_cpu_exec_interrupt;
+    cc->set_pc = rh850_cpu_set_pc;
+    cc->tlb_fill = rh850_tlb_fill;
+    cc->synchronize_from_tb = rh850_cpu_synchronize_from_tb;
+    cc->debug_excp_handler = rh850_debug_excp_handler;
+    cc->debug_check_watchpoint = rh850_debug_check_watchpoint;
+
+#ifdef CONFIG_USER_ONLY
+    cc->handle_mmu_fault = rh850_cpu_handle_mmu_fault;
+#else
+    cc->do_unaligned_access = rh850_cpu_do_unaligned_access;
+    cc->get_phys_page_debug = rh850_cpu_get_phys_page_debug;
+#endif
+#ifdef CONFIG_TCG
+    cc->tcg_initialize = rh850_translate_init;
+#endif
+}
+
+RH850CPU *cpu_rh850_init(struct uc_struct *uc, const char *cpu_model)
+{
+    RH850CPU *cpu;
+    CPUState *cs;
+    CPUClass *cc;
+
+    cpu = calloc(1, sizeof(*cpu));
+    if (cpu == NULL) {
+        return NULL;
+    }
+
+    cs = (CPUState *)cpu;
+    cc = (CPUClass *)&cpu->cc;
+    cs->cc = cc;
+    cs->uc = uc;
+    uc->cpu = (CPUState *)cpu;
+
+    /* init CPUClass */
+    cpu_class_init(uc, cc);
+
+    /* init CPUClass */
+    rh850_cpu_class_init(uc, cc);
+
+    /* init CPUState */
+    cpu_common_initfn(uc, cs);
+
+    /* init CPU */
+    rh850_cpu_init(uc, cs);
+
+    /* realize CPU */
+    rh850_cpu_realize(uc, cs);
+
+    // init addresss space
+    cpu_address_space_init(cs, 0, cs->memory);
+
+    return cpu;
+}
+
+
+
+
diff --git a/qemu/target/rh850/cpu.h b/qemu/target/rh850/cpu.h
new file mode 100644
index 0000000000..c54ad11599
--- /dev/null
+++ b/qemu/target/rh850/cpu.h
@@ -0,0 +1,276 @@
+/*
+ * QEMU RH850 CPU
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2017-2018 SiFive, Inc.
+ * Copyright (c) 2023 Quarkslab
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef RH850_CPU_H
+#define RH850_CPU_H
+
+#define TCG_GUEST_DEFAULT_MO 0
+
+//#define TARGET_INSN_START_EXTRA_WORDS 2
+
+#define ELF_MACHINE EM_RH850
+#define CPUArchState struct CPURH850State
+
+#include "qemu-common.h"
+#include "hw/core/cpu.h"
+#include "exec/cpu-defs.h"
+#include "fpu/softfloat.h"
+
+#define TYPE_RH850_CPU "rh850-cpu"
+
+#define RH850_CPU_TYPE_SUFFIX "-" TYPE_RH850_CPU
+#define RH850_CPU_TYPE_NAME(name) (name RH850_CPU_TYPE_SUFFIX)
+#define CPU_RESOLVING_TYPE TYPE_RH850_CPU
+#define TYPE_RH850_CPU_ANY              RH850_CPU_TYPE_NAME("any")
+
+#define RV32 ((target_ulong)1 << (TARGET_LONG_BITS - 2))
+#define RV64 ((target_ulong)2 << (TARGET_LONG_BITS - 2))
+
+#if defined(TARGET_RH850)
+#define RVXLEN RV32
+#elif defined(TARGET_RH85064)
+#define RVXLEN RV64
+#endif
+
+#define RV(x) ((target_ulong)1 << (x - 'A'))
+
+#define RVI RV('I')
+#define RVM RV('M')
+#define RVA RV('A')
+#define RVF RV('F')
+#define RVD RV('D')
+#define RVC RV('C')
+#define RVS RV('S')
+#define RVU RV('U')
+
+/* S extension denotes that Supervisor mode exists, however it is possible
+   to have a core that support S mode but does not have an MMU and there
+   is currently no bit in misa to indicate whether an MMU exists or not
+   so a cpu features bitfield is required */
+enum {
+    RH850_FEATURE_MMU
+};
+
+#define USER_VERSION_2_02_0 0x00020200
+#define PRIV_VERSION_1_09_1 0x00010901
+#define PRIV_VERSION_1_10_0 0x00011000
+
+#define TRANSLATE_FAIL 1
+#define TRANSLATE_SUCCESS 0
+#define MMU_USER_IDX 3
+
+#define MAX_RH850_PMPS (16)
+
+typedef struct CPURH850State CPURH850State;
+
+#include "pmp.h"
+
+#include "register_indices.h"
+
+#define NUM_GP_REGS 32
+#define NUM_SYS_REG_BANKS 7
+#define MAX_SYS_REGS_IN_BANK 32
+#define BANK_ID_BASIC_0 0
+#define BANK_ID_BASIC_1 1
+#define BANK_ID_BASIC_2 2
+
+struct CPURH850State {
+
+
+    target_ulong gpRegs[NUM_GP_REGS];
+    target_ulong pc;
+    target_ulong sysDatabuffRegs[1];
+    target_ulong systemRegs[NUM_SYS_REG_BANKS][MAX_SYS_REGS_IN_BANK];
+    //target_ulong sysBasicRegs[31];
+    //target_ulong sysInterruptRegs[5];
+    //uint64_t sysFpuRegs[6];  //using rh850 basic system registers(sr6-sr11), 32-bit or 64-bit precision
+    //target_ulong sysMpuRegs[56];
+    //target_ulong sysCacheRegs[7];
+
+    // flags contained in PSW register
+    uint32_t Z_flag;
+    uint32_t S_flag;
+    uint32_t OV_flag;
+    uint32_t CY_flag;
+    uint32_t SAT_flag;
+    uint32_t ID_flag;
+    uint32_t EP_flag;
+    uint32_t NP_flag;
+    uint32_t EBV_flag;
+    uint32_t CU0_flag;
+    uint32_t CU1_flag;
+    uint32_t CU2_flag;
+    uint32_t UM_flag;
+
+       uint32_t features;
+    uint32_t badaddr;
+
+    target_ulong cpu_LLbit;     // register for mutual exclusion (LDL.W, STC.W)
+    target_ulong cpu_LLAddress;     // register for mutual exclusion (LDL.W, STC.W)
+
+    target_ulong load_res;      // inst addr for TCG
+    target_ulong load_val;      // inst val for TCG
+
+    float_status fp_status;     // not used yet in rh850, left for floating-point support.
+
+    target_ulong fpsr;      /* floating-point configuration/status register. */
+
+    uint32_t exception_cause;
+    int exception_priority;
+    bool exception_dv;  
+
+    // Unicorn engine
+    struct uc_struct *uc;
+};
+
+#define RH850_CPU(obj) ((RH850CPU *)obj)
+#define RH850_CPU_CLASS(klass) ((RH850CPUClass *)klass)
+#define RH850_CPU_GET_CLASS(obj) (&((RH850CPU *)obj)->cc)
+
+
+/**
+ * RH850CPUClass:
+ * @parent_realize: The parent class' realize handler.
+ * @parent_reset: The parent class' reset handler.
+ *
+ * A RH850 CPU model.
+ */
+typedef struct RH850CPUClass {
+    /*< private >*/
+    CPUClass parent_class;
+    /*< public >*/
+    void (*parent_reset)(CPUState *cpu);
+} RH850CPUClass;
+
+/**
+ * RH850CPU:
+ * @env: #CPURH850State
+ *
+ * A RH850 CPU.
+ */
+typedef struct RH850CPU {
+    /*< private >*/
+    CPUState parent_obj;
+    /*< public >*/
+    CPUNegativeOffsetState neg;
+    CPURH850State env;
+
+    RH850CPUClass cc;
+} RH850CPU;
+
+typedef RH850CPU ArchCPU;
+
+static inline RH850CPU *rh850_env_get_cpu(CPURH850State *env)
+{
+    return container_of(env, RH850CPU, env);
+}
+
+static inline int rh850_has_ext(CPURH850State *env, target_ulong ext)
+{		// TODO: what does value 'ext' represent??
+    //return (env->misa & ext) != 0;
+	return true;
+}
+
+static inline bool rh850_feature(CPURH850State *env, int feature)
+{
+    return env->features & (1ULL << feature);
+}
+
+#include "cpu_user.h"
+#include "cpu_bits.h"
+
+extern const char * const rh850_gp_regnames[];
+extern const char * const rh850_sys_regnames[][MAX_SYS_REGS_IN_BANK];
+extern const char * const rh850_sys_databuff_regnames[];
+
+extern const char * const rh850_excp_names[];
+extern const char * const rh850_intr_names[];
+extern const uint32_t rh850_sys_reg_read_only_values[][MAX_SYS_REGS_IN_BANK];
+extern const uint32_t rh850_sys_reg_read_only_masks[][MAX_SYS_REGS_IN_BANK];
+
+#define ENV_GET_CPU(e) CPU(rh850_env_get_cpu(e))
+#define ENV_OFFSET offsetof(RH850CPU, env)
+
+void rh850_cpu_do_interrupt(CPUState *cpu);
+int rh850_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
+int rh850_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+bool rh850_cpu_exec_interrupt(CPUState *cs, int interrupt_request);
+int rh850_cpu_mmu_index(CPURH850State *env, bool ifetch);
+hwaddr rh850_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
+void  rh850_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                    MMUAccessType access_type, int mmu_idx,
+                                    uintptr_t retaddr);
+int rh850_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int size,
+                              int rw, int mmu_idx);
+
+char *rh850_isa_string(RH850CPU *cpu);
+void rh850_cpu_list(void);
+
+#define cpu_init(cpu_model) cpu_generic_init(TYPE_RH850_CPU, cpu_model)
+#define cpu_signal_handler cpu_rh850_signal_handler
+#define cpu_list rh850_cpu_list
+#define cpu_mmu_index rh850_cpu_mmu_index
+
+void rh850_set_mode(CPURH850State *env, target_ulong newpriv);
+
+void rh850_translate_init(struct uc_struct *uc);
+RH850CPU *cpu_rh850_init(struct uc_struct *uc, const char *cpu_model);
+int cpu_rh850_signal_handler(int host_signum, void *pinfo, void *puc);
+void QEMU_NORETURN do_raise_exception_err(CPURH850State *env,
+                                          uint32_t exception, uintptr_t pc);
+
+target_ulong cpu_rh850_get_fflags(CPURH850State *env);
+void cpu_rh850_set_fflags(CPURH850State *env, target_ulong);
+void rh850_cpu_set_pc(CPUState *cs, vaddr value);
+vaddr rh850_cpu_get_pc(CPUState *cs);
+AddressSpace *cpu_addressspace(CPUState *cs, MemTxAttrs attrs);
+
+#define TB_FLAGS_MMU_MASK  3
+#define TB_FLAGS_FP_ENABLE MSTATUS_FS
+
+/*
+ * This f. is called from  tcg_gen_lookup_and_goto_ptr() to obtain PC
+ * which is then used for TB lookup.
+ */
+static inline void cpu_get_tb_cpu_state(CPURH850State *env, target_ulong *pc,
+                                        target_ulong *cs_base, uint32_t *flags)
+{
+    *pc = env->pc;
+    *cs_base = 0;
+#ifdef CONFIG_USER_ONLY
+    *flags = TB_FLAGS_FP_ENABLE;
+#else
+    *flags = cpu_mmu_index(env, 0);
+#endif
+}
+
+void csr_write_helper(CPURH850State *env, target_ulong val_to_write,
+        target_ulong csrno);
+target_ulong csr_read_helper(CPURH850State *env, target_ulong csrno);
+
+#ifndef CONFIG_USER_ONLY
+void rh850_set_local_interrupt(RH850CPU *cpu, target_ulong mask, int value);
+#endif
+
+extern const int NUM_GDB_REGS;
+
+#include "exec/cpu-all.h"
+
+#endif /* RH850_CPU_H */
diff --git a/qemu/target/rh850/cpu_bits.h b/qemu/target/rh850/cpu_bits.h
new file mode 100644
index 0000000000..a3b90298a6
--- /dev/null
+++ b/qemu/target/rh850/cpu_bits.h
@@ -0,0 +1,431 @@
+/* RH850 PSW constants */
+
+#define PSW_Z 	0x00000001
+#define PSW_S 	0x00000002
+#define PSW_OV 	0x00000004
+#define PSW_CY 	0x00000008
+#define PSW_SAT	0x00000010
+#define PSW_ID	0x00000020
+#define PSW_EP  0x00000040
+#define PSW_NP	0x00000080
+#define PSW_EBV	0x00008000
+#define PSW_CU0	0x00010000
+#define PSW_UM	0x40000000
+
+/*  */
+
+/* RH850 ISA constants */
+
+#define get_field(reg, mask) (((reg) & \
+                 (target_ulong)(mask)) / ((mask) & ~((mask) << 1)))
+#define set_field(reg, mask, val) (((reg) & ~(target_ulong)(mask)) | \
+                 (((target_ulong)(val) * ((mask) & ~((mask) << 1))) & \
+                 (target_ulong)(mask)))
+
+#define PGSHIFT 12
+
+#define FSR_RD_SHIFT 5
+#define FSR_RD   (0x7 << FSR_RD_SHIFT)
+
+#define FPEXC_NX 0x01
+#define FPEXC_UF 0x02
+#define FPEXC_OF 0x04
+#define FPEXC_DZ 0x08
+#define FPEXC_NV 0x10
+
+#define FSR_AEXC_SHIFT 0
+#define FSR_NVA  (FPEXC_NV << FSR_AEXC_SHIFT)
+#define FSR_OFA  (FPEXC_OF << FSR_AEXC_SHIFT)
+#define FSR_UFA  (FPEXC_UF << FSR_AEXC_SHIFT)
+#define FSR_DZA  (FPEXC_DZ << FSR_AEXC_SHIFT)
+#define FSR_NXA  (FPEXC_NX << FSR_AEXC_SHIFT)
+#define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
+
+/* CSR numbers */
+#define CSR_FFLAGS 0x1
+#define CSR_FRM 0x2
+#define CSR_FCSR 0x3
+#define CSR_CYCLE 0xc00
+#define CSR_TIME 0xc01
+#define CSR_INSTRET 0xc02
+#define CSR_HPMCOUNTER3 0xc03
+#define CSR_HPMCOUNTER4 0xc04
+#define CSR_HPMCOUNTER5 0xc05
+#define CSR_HPMCOUNTER6 0xc06
+#define CSR_HPMCOUNTER7 0xc07
+#define CSR_HPMCOUNTER8 0xc08
+#define CSR_HPMCOUNTER9 0xc09
+#define CSR_HPMCOUNTER10 0xc0a
+#define CSR_HPMCOUNTER11 0xc0b
+#define CSR_HPMCOUNTER12 0xc0c
+#define CSR_HPMCOUNTER13 0xc0d
+#define CSR_HPMCOUNTER14 0xc0e
+#define CSR_HPMCOUNTER15 0xc0f
+#define CSR_HPMCOUNTER16 0xc10
+#define CSR_HPMCOUNTER17 0xc11
+#define CSR_HPMCOUNTER18 0xc12
+#define CSR_HPMCOUNTER19 0xc13
+#define CSR_HPMCOUNTER20 0xc14
+#define CSR_HPMCOUNTER21 0xc15
+#define CSR_HPMCOUNTER22 0xc16
+#define CSR_HPMCOUNTER23 0xc17
+#define CSR_HPMCOUNTER24 0xc18
+#define CSR_HPMCOUNTER25 0xc19
+#define CSR_HPMCOUNTER26 0xc1a
+#define CSR_HPMCOUNTER27 0xc1b
+#define CSR_HPMCOUNTER28 0xc1c
+#define CSR_HPMCOUNTER29 0xc1d
+#define CSR_HPMCOUNTER30 0xc1e
+#define CSR_HPMCOUNTER31 0xc1f
+#define CSR_SSTATUS 0x100
+#define CSR_SIE 0x104
+#define CSR_STVEC 0x105
+#define CSR_SCOUNTEREN 0x106
+#define CSR_SSCRATCH 0x140
+#define CSR_SEPC 0x141
+#define CSR_SCAUSE 0x142
+#define CSR_SBADADDR 0x143
+#define CSR_SIP 0x144
+#define CSR_SPTBR 0x180
+#define CSR_SATP 0x180
+#define CSR_MSTATUS 0x300
+#define CSR_MISA 0x301
+#define CSR_MEDELEG 0x302
+#define CSR_MIDELEG 0x303
+#define CSR_MIE 0x304
+#define CSR_MTVEC 0x305
+#define CSR_MCOUNTEREN 0x306
+#define CSR_MSCRATCH 0x340
+#define CSR_MEPC 0x341
+#define CSR_MCAUSE 0x342
+#define CSR_MBADADDR 0x343
+#define CSR_MIP 0x344
+#define CSR_PMPCFG0 0x3a0
+#define CSR_PMPCFG1 0x3a1
+#define CSR_PMPCFG2 0x3a2
+#define CSR_PMPCFG3 0x3a3
+#define CSR_PMPADDR0 0x3b0
+#define CSR_PMPADDR1 0x3b1
+#define CSR_PMPADDR2 0x3b2
+#define CSR_PMPADDR3 0x3b3
+#define CSR_PMPADDR4 0x3b4
+#define CSR_PMPADDR5 0x3b5
+#define CSR_PMPADDR6 0x3b6
+#define CSR_PMPADDR7 0x3b7
+#define CSR_PMPADDR8 0x3b8
+#define CSR_PMPADDR9 0x3b9
+#define CSR_PMPADDR10 0x3ba
+#define CSR_PMPADDR11 0x3bb
+#define CSR_PMPADDR12 0x3bc
+#define CSR_PMPADDR13 0x3bd
+#define CSR_PMPADDR14 0x3be
+#define CSR_PMPADDR15 0x3bf
+#define CSR_TSELECT 0x7a0
+#define CSR_TDATA1 0x7a1
+#define CSR_TDATA2 0x7a2
+#define CSR_TDATA3 0x7a3
+#define CSR_DCSR 0x7b0
+#define CSR_DPC 0x7b1
+#define CSR_DSCRATCH 0x7b2
+#define CSR_MCYCLE 0xb00
+#define CSR_MINSTRET 0xb02
+#define CSR_MHPMCOUNTER3 0xb03
+#define CSR_MHPMCOUNTER4 0xb04
+#define CSR_MHPMCOUNTER5 0xb05
+#define CSR_MHPMCOUNTER6 0xb06
+#define CSR_MHPMCOUNTER7 0xb07
+#define CSR_MHPMCOUNTER8 0xb08
+#define CSR_MHPMCOUNTER9 0xb09
+#define CSR_MHPMCOUNTER10 0xb0a
+#define CSR_MHPMCOUNTER11 0xb0b
+#define CSR_MHPMCOUNTER12 0xb0c
+#define CSR_MHPMCOUNTER13 0xb0d
+#define CSR_MHPMCOUNTER14 0xb0e
+#define CSR_MHPMCOUNTER15 0xb0f
+#define CSR_MHPMCOUNTER16 0xb10
+#define CSR_MHPMCOUNTER17 0xb11
+#define CSR_MHPMCOUNTER18 0xb12
+#define CSR_MHPMCOUNTER19 0xb13
+#define CSR_MHPMCOUNTER20 0xb14
+#define CSR_MHPMCOUNTER21 0xb15
+#define CSR_MHPMCOUNTER22 0xb16
+#define CSR_MHPMCOUNTER23 0xb17
+#define CSR_MHPMCOUNTER24 0xb18
+#define CSR_MHPMCOUNTER25 0xb19
+#define CSR_MHPMCOUNTER26 0xb1a
+#define CSR_MHPMCOUNTER27 0xb1b
+#define CSR_MHPMCOUNTER28 0xb1c
+#define CSR_MHPMCOUNTER29 0xb1d
+#define CSR_MHPMCOUNTER30 0xb1e
+#define CSR_MHPMCOUNTER31 0xb1f
+#define CSR_MUCOUNTEREN 0x320
+#define CSR_MSCOUNTEREN 0x321
+#define CSR_MHPMEVENT3 0x323
+#define CSR_MHPMEVENT4 0x324
+#define CSR_MHPMEVENT5 0x325
+#define CSR_MHPMEVENT6 0x326
+#define CSR_MHPMEVENT7 0x327
+#define CSR_MHPMEVENT8 0x328
+#define CSR_MHPMEVENT9 0x329
+#define CSR_MHPMEVENT10 0x32a
+#define CSR_MHPMEVENT11 0x32b
+#define CSR_MHPMEVENT12 0x32c
+#define CSR_MHPMEVENT13 0x32d
+#define CSR_MHPMEVENT14 0x32e
+#define CSR_MHPMEVENT15 0x32f
+#define CSR_MHPMEVENT16 0x330
+#define CSR_MHPMEVENT17 0x331
+#define CSR_MHPMEVENT18 0x332
+#define CSR_MHPMEVENT19 0x333
+#define CSR_MHPMEVENT20 0x334
+#define CSR_MHPMEVENT21 0x335
+#define CSR_MHPMEVENT22 0x336
+#define CSR_MHPMEVENT23 0x337
+#define CSR_MHPMEVENT24 0x338
+#define CSR_MHPMEVENT25 0x339
+#define CSR_MHPMEVENT26 0x33a
+#define CSR_MHPMEVENT27 0x33b
+#define CSR_MHPMEVENT28 0x33c
+#define CSR_MHPMEVENT29 0x33d
+#define CSR_MHPMEVENT30 0x33e
+#define CSR_MHPMEVENT31 0x33f
+#define CSR_MVENDORID 0xf11
+#define CSR_MARCHID 0xf12
+#define CSR_MIMPID 0xf13
+#define CSR_MHARTID 0xf14
+#define CSR_CYCLEH 0xc80
+#define CSR_TIMEH 0xc81
+#define CSR_INSTRETH 0xc82
+#define CSR_HPMCOUNTER3H 0xc83
+#define CSR_HPMCOUNTER4H 0xc84
+#define CSR_HPMCOUNTER5H 0xc85
+#define CSR_HPMCOUNTER6H 0xc86
+#define CSR_HPMCOUNTER7H 0xc87
+#define CSR_HPMCOUNTER8H 0xc88
+#define CSR_HPMCOUNTER9H 0xc89
+#define CSR_HPMCOUNTER10H 0xc8a
+#define CSR_HPMCOUNTER11H 0xc8b
+#define CSR_HPMCOUNTER12H 0xc8c
+#define CSR_HPMCOUNTER13H 0xc8d
+#define CSR_HPMCOUNTER14H 0xc8e
+#define CSR_HPMCOUNTER15H 0xc8f
+#define CSR_HPMCOUNTER16H 0xc90
+#define CSR_HPMCOUNTER17H 0xc91
+#define CSR_HPMCOUNTER18H 0xc92
+#define CSR_HPMCOUNTER19H 0xc93
+#define CSR_HPMCOUNTER20H 0xc94
+#define CSR_HPMCOUNTER21H 0xc95
+#define CSR_HPMCOUNTER22H 0xc96
+#define CSR_HPMCOUNTER23H 0xc97
+#define CSR_HPMCOUNTER24H 0xc98
+#define CSR_HPMCOUNTER25H 0xc99
+#define CSR_HPMCOUNTER26H 0xc9a
+#define CSR_HPMCOUNTER27H 0xc9b
+#define CSR_HPMCOUNTER28H 0xc9c
+#define CSR_HPMCOUNTER29H 0xc9d
+#define CSR_HPMCOUNTER30H 0xc9e
+#define CSR_HPMCOUNTER31H 0xc9f
+#define CSR_MCYCLEH 0xb80
+#define CSR_MINSTRETH 0xb82
+#define CSR_MHPMCOUNTER3H 0xb83
+#define CSR_MHPMCOUNTER4H 0xb84
+#define CSR_MHPMCOUNTER5H 0xb85
+#define CSR_MHPMCOUNTER6H 0xb86
+#define CSR_MHPMCOUNTER7H 0xb87
+#define CSR_MHPMCOUNTER8H 0xb88
+#define CSR_MHPMCOUNTER9H 0xb89
+#define CSR_MHPMCOUNTER10H 0xb8a
+#define CSR_MHPMCOUNTER11H 0xb8b
+#define CSR_MHPMCOUNTER12H 0xb8c
+#define CSR_MHPMCOUNTER13H 0xb8d
+#define CSR_MHPMCOUNTER14H 0xb8e
+#define CSR_MHPMCOUNTER15H 0xb8f
+#define CSR_MHPMCOUNTER16H 0xb90
+#define CSR_MHPMCOUNTER17H 0xb91
+#define CSR_MHPMCOUNTER18H 0xb92
+#define CSR_MHPMCOUNTER19H 0xb93
+#define CSR_MHPMCOUNTER20H 0xb94
+#define CSR_MHPMCOUNTER21H 0xb95
+#define CSR_MHPMCOUNTER22H 0xb96
+#define CSR_MHPMCOUNTER23H 0xb97
+#define CSR_MHPMCOUNTER24H 0xb98
+#define CSR_MHPMCOUNTER25H 0xb99
+#define CSR_MHPMCOUNTER26H 0xb9a
+#define CSR_MHPMCOUNTER27H 0xb9b
+#define CSR_MHPMCOUNTER28H 0xb9c
+#define CSR_MHPMCOUNTER29H 0xb9d
+#define CSR_MHPMCOUNTER30H 0xb9e
+#define CSR_MHPMCOUNTER31H 0xb9f
+
+/* mstatus bits */
+#define MSTATUS_UIE         0x00000001
+#define MSTATUS_SIE         0x00000002
+#define MSTATUS_HIE         0x00000004
+#define MSTATUS_MIE         0x00000008
+#define MSTATUS_UPIE        0x00000010
+#define MSTATUS_SPIE        0x00000020
+#define MSTATUS_HPIE        0x00000040
+#define MSTATUS_MPIE        0x00000080
+#define MSTATUS_SPP         0x00000100
+#define MSTATUS_HPP         0x00000600
+#define MSTATUS_MPP         0x00001800
+#define MSTATUS_FS          0x00006000
+#define MSTATUS_XS          0x00018000
+#define MSTATUS_MPRV        0x00020000
+#define MSTATUS_PUM         0x00040000 /* until: priv-1.9.1 */
+#define MSTATUS_SUM         0x00040000 /* since: priv-1.10 */
+#define MSTATUS_MXR         0x00080000
+#define MSTATUS_VM          0x1F000000 /* until: priv-1.9.1 */
+#define MSTATUS_TVM         0x00100000 /* since: priv-1.10 */
+#define MSTATUS_TW          0x20000000 /* since: priv-1.10 */
+#define MSTATUS_TSR         0x40000000 /* since: priv-1.10 */
+
+#define MSTATUS64_UXL       0x0000000300000000ULL
+#define MSTATUS64_SXL       0x0000000C00000000ULL
+
+#define MSTATUS32_SD        0x80000000
+#define MSTATUS64_SD        0x8000000000000000ULL
+
+#if defined(TARGET_RH850)
+#define MSTATUS_SD MSTATUS32_SD
+#endif
+
+/* sstatus bits */
+#define SSTATUS_UIE         0x00000001
+#define SSTATUS_SIE         0x00000002
+#define SSTATUS_UPIE        0x00000010
+#define SSTATUS_SPIE        0x00000020
+#define SSTATUS_SPP         0x00000100
+#define SSTATUS_FS          0x00006000
+#define SSTATUS_XS          0x00018000
+#define SSTATUS_PUM         0x00040000 /* until: priv-1.9.1 */
+#define SSTATUS_SUM         0x00040000 /* since: priv-1.10 */
+#define SSTATUS_MXR         0x00080000
+
+#define SSTATUS32_SD        0x80000000
+#define SSTATUS64_SD        0x8000000000000000ULL
+
+#if defined(TARGET_RH850)
+#define SSTATUS_SD SSTATUS32_SD
+#endif
+
+/* irqs */
+#define MIP_SSIP            (1 << IRQ_S_SOFT)
+#define MIP_HSIP            (1 << IRQ_H_SOFT)
+#define MIP_MSIP            (1 << IRQ_M_SOFT)
+#define MIP_STIP            (1 << IRQ_S_TIMER)
+#define MIP_HTIP            (1 << IRQ_H_TIMER)
+#define MIP_MTIP            (1 << IRQ_M_TIMER)
+#define MIP_SEIP            (1 << IRQ_S_EXT)
+#define MIP_HEIP            (1 << IRQ_H_EXT)
+#define MIP_MEIP            (1 << IRQ_M_EXT)
+
+#define SIP_SSIP            MIP_SSIP
+#define SIP_STIP            MIP_STIP
+#define SIP_SEIP            MIP_SEIP
+
+#define PRV_U 0
+#define PRV_S 1
+#define PRV_H 2
+#define PRV_M 3
+
+/* privileged ISA 1.9.1 VM modes (mstatus.vm) */
+#define VM_1_09_MBARE 0
+#define VM_1_09_MBB   1
+#define VM_1_09_MBBID 2
+#define VM_1_09_SV32  8
+#define VM_1_09_SV39  9
+#define VM_1_09_SV48  10
+
+/* privileged ISA 1.10.0 VM modes (satp.mode) */
+#define VM_1_10_MBARE 0
+#define VM_1_10_SV32  1
+#define VM_1_10_SV39  8
+#define VM_1_10_SV48  9
+#define VM_1_10_SV57  10
+#define VM_1_10_SV64  11
+
+/* privileged ISA interrupt causes */
+#define IRQ_U_SOFT      0  /* since: priv-1.10 */
+#define IRQ_S_SOFT      1
+#define IRQ_H_SOFT      2  /* until: priv-1.9.1 */
+#define IRQ_M_SOFT      3  /* until: priv-1.9.1 */
+#define IRQ_U_TIMER     4  /* since: priv-1.10 */
+#define IRQ_S_TIMER     5
+#define IRQ_H_TIMER     6  /* until: priv-1.9.1 */
+#define IRQ_M_TIMER     7  /* until: priv-1.9.1 */
+#define IRQ_U_EXT       8  /* since: priv-1.10 */
+#define IRQ_S_EXT       9
+#define IRQ_H_EXT       10 /* until: priv-1.9.1 */
+#define IRQ_M_EXT       11 /* until: priv-1.9.1 */
+#define IRQ_X_COP       12 /* non-standard */
+
+/* Default addresses */
+#define DEFAULT_RSTVEC     0x00000000
+
+/* RV32 satp field masks */
+#define SATP32_MODE 0x80000000
+#define SATP32_ASID 0x7fc00000
+#define SATP32_PPN  0x003fffff
+
+/* RV64 satp field masks */
+#define SATP64_MODE 0xF000000000000000ULL
+#define SATP64_ASID 0x0FFFF00000000000ULL
+#define SATP64_PPN  0x00000FFFFFFFFFFFULL
+
+#if defined(TARGET_RH850)
+#define SATP_MODE SATP32_MODE
+#define SATP_ASID SATP32_ASID
+#define SATP_PPN  SATP32_PPN
+#endif
+
+/* RH850 Exception Codes */
+#define EXCP_NONE                       -1 /* not a real RH850 exception code */
+#define RH850_EXCP_INST_ADDR_MIS           0x0
+#define RH850_EXCP_INST_ACCESS_FAULT       0x1
+#define RH850_EXCP_ILLEGAL_INST            0x2
+#define RH850_EXCP_BREAKPOINT              0x3
+#define RH850_EXCP_LOAD_ADDR_MIS           0x4
+#define RH850_EXCP_LOAD_ACCESS_FAULT       0x5
+#define RH850_EXCP_STORE_AMO_ADDR_MIS      0x6
+#define RH850_EXCP_STORE_AMO_ACCESS_FAULT  0x7
+#define RH850_EXCP_U_ECALL                 0x8 /* for convenience, report all
+                                                  ECALLs as this, handler
+                                                  fixes */
+#define RH850_EXCP_S_ECALL                 0x9
+#define RH850_EXCP_H_ECALL                 0xa
+#define RH850_EXCP_M_ECALL                 0xb
+#define RH850_EXCP_INST_PAGE_FAULT         0xc /* since: priv-1.10.0 */
+#define RH850_EXCP_LOAD_PAGE_FAULT         0xd /* since: priv-1.10.0 */
+#define RH850_EXCP_STORE_PAGE_FAULT        0xf /* since: priv-1.10.0 */
+#define RH850_EXCP_FETRAP                  0x10 
+#define RH850_EXCP_TRAP                    0x11
+#define RH850_EXCP_RIE                     0x12
+#define RH850_EXCP_SYSCALL                 0x13
+#define RH850_EXCP_EIINT                   0x14
+#define RH850_EXCP_FEINT                   0x15
+#define RH850_EXCP_FENMI                   0x16
+
+/* Specific interrupts (FENMI, FEINT, EIINT). */
+#define RH850_INT_FENMI                     CPU_INTERRUPT_TGT_EXT_0  /* Exception handler address is table-based */
+#define RH850_INT_FEINT                     CPU_INTERRUPT_TGT_EXT_1  /* Defines a non-maskable FE interrupt */
+#define RH850_INT_EIINT                     CPU_INTERRUPT_TGT_EXT_2  /* Defines a maskable FE interrupt */
+
+#define RH850_EXCP_INT_FLAG                0x80000000
+#define RH850_EXCP_INT_MASK                0x7fffffff
+
+
+/* page table entry (PTE) fields */
+#define PTE_V     0x001 /* Valid */
+#define PTE_R     0x002 /* Read */
+#define PTE_W     0x004 /* Write */
+#define PTE_X     0x008 /* Execute */
+#define PTE_U     0x010 /* User */
+#define PTE_G     0x020 /* Global */
+#define PTE_A     0x040 /* Accessed */
+#define PTE_D     0x080 /* Dirty */
+#define PTE_SOFT  0x300 /* Reserved for Software */
+
+#define PTE_PPN_SHIFT 10
+
+#define PTE_TABLE(PTE) (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V)
diff --git a/qemu/target/rh850/cpu_user.h b/qemu/target/rh850/cpu_user.h
new file mode 100644
index 0000000000..c2199610ab
--- /dev/null
+++ b/qemu/target/rh850/cpu_user.h
@@ -0,0 +1,13 @@
+#define xRA 1   /* return address (aka link register) */
+#define xSP 2   /* stack pointer */
+#define xGP 3   /* global pointer */
+#define xTP 4   /* thread pointer */
+
+#define xA0 10  /* gpr[10-17] are syscall arguments */
+#define xA1 11
+#define xA2 12
+#define xA3 13
+#define xA4 14
+#define xA5 15
+#define xA6 16
+#define xA7 17  /* syscall number goes here */
diff --git a/qemu/target/rh850/fpu_helper.c b/qemu/target/rh850/fpu_helper.c
new file mode 100644
index 0000000000..d99c8613dd
--- /dev/null
+++ b/qemu/target/rh850/fpu_helper.c
@@ -0,0 +1,823 @@
+/*
+ * RH850 FPU Emulation Helpers for QEMU.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include <stdlib.h>
+#include "cpu.h"
+#include "qemu/host-utils.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+
+target_ulong cpu_rh850_get_fflags(CPURH850State *env)
+{
+    int soft = get_float_exception_flags(&env->fp_status);
+    target_ulong hard = 0;
+
+    hard |= (soft & float_flag_inexact) ? FPEXC_NX : 0;
+    hard |= (soft & float_flag_underflow) ? FPEXC_UF : 0;
+    hard |= (soft & float_flag_overflow) ? FPEXC_OF : 0;
+    hard |= (soft & float_flag_divbyzero) ? FPEXC_DZ : 0;
+    hard |= (soft & float_flag_invalid) ? FPEXC_NV : 0;
+
+    return hard;
+}
+
+void cpu_rh850_set_fflags(CPURH850State *env, target_ulong hard)
+{
+    int soft = 0;
+
+    soft |= (hard & FPEXC_NX) ? float_flag_inexact : 0;
+    soft |= (hard & FPEXC_UF) ? float_flag_underflow : 0;
+    soft |= (hard & FPEXC_OF) ? float_flag_overflow : 0;
+    soft |= (hard & FPEXC_DZ) ? float_flag_divbyzero : 0;
+    soft |= (hard & FPEXC_NV) ? float_flag_invalid : 0;
+
+    set_float_exception_flags(soft, &env->fp_status);
+}
+
+void helper_set_rounding_mode(CPURH850State *env, uint32_t rm)
+{
+    int softrm;
+
+    if (rm == 7) {
+        rm = 0; //env->frm;
+    }
+    switch (rm) {
+    case 0:
+        softrm = float_round_nearest_even;
+        break;
+    case 1:
+        softrm = float_round_to_zero;
+        break;
+    case 2:
+        softrm = float_round_down;
+        break;
+    case 3:
+        softrm = float_round_up;
+        break;
+    case 4:
+        softrm = float_round_ties_away;
+        break;
+    default:
+        qemu_log_mask(CPU_LOG_INT, "%s\n", __func__);
+        do_raise_exception_err(env, RH850_EXCP_ILLEGAL_INST, GETPC());
+    }
+
+    set_float_rounding_mode(softrm, &env->fp_status);
+}
+
+/* Propagate softfloat flags into FPSR. */
+void helper_f_sync_fflags(CPURH850State *env)
+{
+    target_ulong flags;
+
+    /* Retrieve softfloat flags. */
+    flags = cpu_rh850_get_fflags(env);
+
+    /* Handle inexact flag. */
+    if (flags & FPEXC_NX)
+    {
+        if (env->fpsr & (1 << 5))
+        {
+            /* Inexact exception allowed, set cause bit. */
+            env->fpsr |= (1 << 10);
+        }
+        else
+        {
+            /* Set preservation bit. */
+            flags |= 1 << 0;
+        }
+    }
+
+    /* Handle underflow flag. */
+    if (flags & FPEXC_UF)
+    {
+        if (env->fpsr & (1 << 6))
+        {
+            /* Underflow exception allowed, set cause bit. */
+            env->fpsr |= (1 << 11);
+        }
+        else
+        {
+            /* Set preservation bit. */
+            env->fpsr |= 1 << 1;
+        }
+    }
+
+    /* Handle overflow flag. */
+    if (flags & FPEXC_OF)
+    {
+        if (env->fpsr & (1 << 7))
+        {
+            /* Overflow exception allowed, set cause bit. */
+            env->fpsr |= (1 << 12);
+        }
+        else
+        {
+            /* Set preservation bit. */
+            env->fpsr |= 1 << 2;
+        }
+    }
+
+    /* Handle div-by-zero flag. */
+    if (flags & FPEXC_DZ)
+    {
+        if (env->fpsr & (1 << 8))
+        {
+            /* Div-by-zero exception allowed, set cause bit. */
+            env->fpsr |= (1 << 13);
+        }
+        else
+        {
+            /* Set preservation bit. */
+            env->fpsr |= 1 << 3;
+        }
+    }
+
+    /* Handle invalid flag. */
+    if (flags & FPEXC_NV)
+    {
+        if (env->fpsr & (1 << 9))
+        {
+            /* Div-by-zero exception allowed, set cause bit. */
+            env->fpsr |= (1 << 14);
+        }
+        else
+        {
+            /* Set preservation bit. */
+            env->fpsr |= 1 << 4;
+        }
+    }
+}
+
+/**
+ * FPU flags checks
+ **/
+
+uint32_t HELPER(f32_is_normal)(CPURH850State *env, uint32_t frs1)
+{
+    return (uint32_t)float32_is_normal(frs1);
+}
+
+uint32_t HELPER(f32_is_zero_or_normal)(CPURH850State *env, uint32_t frs1)
+{
+    return (uint32_t)float32_is_zero_or_normal(frs1);
+}
+
+uint32_t HELPER(f32_is_infinity)(CPURH850State *env, uint32_t frs1)
+{
+    return (uint32_t)float32_is_infinity(frs1);
+}
+
+
+
+uint64_t helper_fmadd_s(CPURH850State *env, uint64_t frs1, uint64_t frs2,
+                        uint64_t frs3)
+{
+    return float32_muladd(frs1, frs2, frs3, 0, &env->fp_status);
+}
+
+uint64_t helper_fmadd_d(CPURH850State *env, uint64_t frs1, uint64_t frs2,
+                        uint64_t frs3)
+{
+    return float64_muladd(frs1, frs2, frs3, 0, &env->fp_status);
+}
+
+uint64_t helper_fmsub_s(CPURH850State *env, uint64_t frs1, uint64_t frs2,
+                        uint64_t frs3)
+{
+    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c,
+                          &env->fp_status);
+}
+
+uint64_t helper_fmsub_d(CPURH850State *env, uint64_t frs1, uint64_t frs2,
+                        uint64_t frs3)
+{
+    return float64_muladd(frs1, frs2, frs3, float_muladd_negate_c,
+                          &env->fp_status);
+}
+
+uint64_t helper_fnmsub_s(CPURH850State *env, uint64_t frs1, uint64_t frs2,
+                         uint64_t frs3)
+{
+    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_product,
+                          &env->fp_status);
+}
+
+uint64_t helper_fnmsub_d(CPURH850State *env, uint64_t frs1, uint64_t frs2,
+                         uint64_t frs3)
+{
+    return float64_muladd(frs1, frs2, frs3, float_muladd_negate_product,
+                          &env->fp_status);
+}
+
+uint64_t helper_fnmadd_s(CPURH850State *env, uint64_t frs1, uint64_t frs2,
+                         uint64_t frs3)
+{
+    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c |
+                          float_muladd_negate_product, &env->fp_status);
+}
+
+uint64_t helper_fnmadd_d(CPURH850State *env, uint64_t frs1, uint64_t frs2,
+                         uint64_t frs3)
+{
+    return float64_muladd(frs1, frs2, frs3, float_muladd_negate_c |
+                          float_muladd_negate_product, &env->fp_status);
+}
+
+
+/**
+ * Floating-point simple precision helpers.
+ **/
+
+uint32_t HELPER(fadd_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2)
+{
+    return float32_add(frs1, frs2, &env->fp_status);
+}
+
+uint32_t HELPER(fsub_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2)
+{
+    return float32_sub(frs1, frs2, &env->fp_status);
+}
+
+uint32_t HELPER(fmul_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2)
+{
+    return float32_mul(frs1, frs2, &env->fp_status);
+}
+
+uint32_t HELPER(fmax_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2)
+{
+    return float32_maxnum(frs1, frs2, &env->fp_status);
+}
+
+uint32_t HELPER(fmin_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2)
+{
+    return float32_minnum(frs1, frs2, &env->fp_status);
+}
+
+uint32_t HELPER(fdiv_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2)
+{
+    return float32_div(frs1, frs2, &env->fp_status);
+}
+
+uint32_t HELPER(fabs_s)(CPURH850State *env, uint32_t frs1)
+{
+    return float32_abs(frs1);
+}
+
+uint32_t HELPER(fneg_s)(CPURH850State *env, uint32_t frs1)
+{
+    return (frs1^0x80000000);
+}
+
+uint32_t HELPER(ftrnc_sw)(CPURH850State *env, uint32_t frs1)
+{
+    return float32_to_int32_round_to_zero(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(fceil_sw)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert to int32 and round to positive. */
+    return float32_to_int32_scalbn(frs1, float_round_up, 0, &env->fp_status);
+}
+
+uint32_t HELPER(ffloor_sw)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert to int32 and round to positive. */
+    return float32_to_int32_scalbn(frs1, float_round_down, 0, &env->fp_status);
+}
+
+uint32_t HELPER(fcvt_sw)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert to int32 and round based on fp_status. */
+    return float32_to_int32(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(fcvt_ls)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert int64 to float32 and round based on fp_status. */
+    return int64_to_float32(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(fcvt_hs)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert lower half of frs1 into float32. */
+    return int16_to_float32((int16_t)(frs1&0xffff), &env->fp_status);
+}
+
+uint32_t HELPER(fcvt_sh)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert float32 to int16_t, zero-extended. */
+    return float32_to_int16(frs1, &env->fp_status) & 0xffff;
+}
+
+uint32_t HELPER(fcvt_ws)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert to float32 and round based on fp_status. */
+    return int32_to_float32(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(ftrnc_suw)(CPURH850State *env, uint32_t frs1)
+{
+    return float32_to_uint32_round_to_zero(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(fceil_suw)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert to int32 and round to positive. */
+    return float32_to_uint32_scalbn(frs1, float_round_up, 0, &env->fp_status);
+}
+
+uint32_t HELPER(ffloor_suw)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert to int32 and round to positive. */
+    return float32_to_uint32_scalbn(frs1, float_round_down, 0, &env->fp_status);
+}
+
+uint32_t HELPER(fcvt_suw)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert to int32 and round based on fp_status. */
+    return float32_to_uint32(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(fcvt_uws)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert from uint32 to float32 and round based on fp_status. */
+    return uint32_to_float32(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(fcvt_uls)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert uint64 to float32 and round based on fp_status. */
+    return uint64_to_float32(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(ftrnc_sl)(CPURH850State *env, uint32_t frs1)
+{
+    return float32_to_int64_round_to_zero(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(fceil_sl)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert float32 to int64 and round to upper value. */
+    return float32_to_int64_scalbn(frs1, float_round_up, 0, &env->fp_status);
+}
+
+uint64_t HELPER(ffloor_sl)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert float32 to int64 and round to lower value. */
+    return float32_to_int64_scalbn(frs1, float_round_down, 0, &env->fp_status);
+}
+
+uint64_t HELPER(fcvt_sl)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert float32 to int64. */
+    return float32_to_int64(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(ftrnc_sul)(CPURH850State *env, uint32_t frs1)
+{
+    return float32_to_uint64_round_to_zero(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(fceil_sul)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert float32 to uint64 and round to upper value. */
+    return float32_to_uint64_scalbn(frs1, float_round_up, 0, &env->fp_status);
+}
+
+uint64_t HELPER(ffloor_sul)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert float32 to uint64 and round to lower value. */
+    return float32_to_uint64_scalbn(frs1, float_round_down, 0, &env->fp_status);
+}
+
+uint64_t HELPER(fcvt_sul)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert float32 to uint64. */
+    return float32_to_uint64(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(fsqrt_s)(CPURH850State *env, uint32_t frs1)
+{
+    return float32_sqrt(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(frecip_s)(CPURH850State *env, uint32_t frs1)
+{
+    /* Compute 1/x (0x3f800000 = float32(1.1)). */
+    return float32_div(0x3f800000, frs1, &env->fp_status);
+}
+
+uint32_t HELPER(frsqrt_s)(CPURH850State *env, uint32_t frs1)
+{
+    /* Compute 1/sqrt(x). */
+    return HELPER(frecip_s)(env, float32_sqrt(frs1, &env->fp_status));
+}
+
+uint32_t HELPER(f_is_nan_s)(CPURH850State *env, uint32_t frs1)
+{
+    /* Check if float32 is NaN. */
+    return float32_is_any_nan(frs1);
+}
+
+uint32_t helper_fle_s(CPURH850State *env, uint32_t frs1, uint32_t frs2)
+{
+    return float32_le(frs1, frs2, &env->fp_status);
+}
+
+uint32_t helper_flt_s(CPURH850State *env, uint32_t frs1, uint32_t frs2)
+{
+    return float32_lt(frs1, frs2, &env->fp_status);
+}
+
+uint32_t helper_feq_s(CPURH850State *env, uint32_t frs1, uint32_t frs2)
+{
+    return float32_eq_quiet(frs1, frs2, &env->fp_status);
+}
+
+
+uint32_t HELPER(fmaf_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2, uint32_t frs3)
+{
+    /* Compute (frs1 * frs2) + frs3 */
+    return float32_muladd(frs1, frs2, frs3, 0, &env->fp_status);
+}
+
+uint32_t HELPER(fmsf_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2, uint32_t frs3)
+{
+    /* Compute (frs1 * frs2) - frs3 */
+    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c, &env->fp_status);
+}
+
+uint32_t HELPER(fnmaf_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2, uint32_t frs3)
+{
+    /* Compute (frs1 * frs2) + frs3 */
+    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_result, &env->fp_status);
+}
+
+uint32_t HELPER(fnmsf_s)(CPURH850State *env, uint32_t frs1, uint32_t frs2, uint32_t frs3)
+{
+    /* Compute (frs1 * frs2) - frs3 */
+    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c | float_muladd_negate_result, &env->fp_status);
+}
+
+
+
+target_ulong helper_fcvt_w_s(CPURH850State *env, uint64_t frs1)
+{
+    return float32_to_int32(frs1, &env->fp_status);
+}
+
+target_ulong helper_fcvt_wu_s(CPURH850State *env, uint64_t frs1)
+{
+    return (int32_t)float32_to_uint32(frs1, &env->fp_status);
+}
+
+#if defined(TARGET_RH85064)
+uint64_t helper_fcvt_l_s(CPURH850State *env, uint64_t frs1)
+{
+    return float32_to_int64(frs1, &env->fp_status);
+}
+
+uint64_t helper_fcvt_lu_s(CPURH850State *env, uint64_t frs1)
+{
+    return float32_to_uint64(frs1, &env->fp_status);
+}
+#endif
+
+uint64_t helper_fcvt_s_w(CPURH850State *env, target_ulong rs1)
+{
+    return int32_to_float32((int32_t)rs1, &env->fp_status);
+}
+
+uint64_t helper_fcvt_s_wu(CPURH850State *env, target_ulong rs1)
+{
+    return uint32_to_float32((uint32_t)rs1, &env->fp_status);
+}
+
+#if defined(TARGET_RH85064)
+uint64_t helper_fcvt_s_l(CPURH850State *env, uint64_t rs1)
+{
+    return int64_to_float32(rs1, &env->fp_status);
+}
+
+uint64_t helper_fcvt_s_lu(CPURH850State *env, uint64_t rs1)
+{
+    return uint64_to_float32(rs1, &env->fp_status);
+}
+#endif
+
+target_ulong helper_fclass_s(uint64_t frs1)
+{
+    float32 f = frs1;
+    bool sign = float32_is_neg(f);
+
+    if (float32_is_infinity(f)) {
+        return sign ? 1 << 0 : 1 << 7;
+    } else if (float32_is_zero(f)) {
+        return sign ? 1 << 3 : 1 << 4;
+    } else if (float32_is_zero_or_denormal(f)) {
+        return sign ? 1 << 2 : 1 << 5;
+    } else if (float32_is_any_nan(f)) {
+        float_status s = { 0 }; /* for snan_bit_is_one */
+        return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
+    } else {
+        return sign ? 1 << 1 : 1 << 6;
+    }
+}
+
+/**
+ * Floating-point double precision helpers.
+ **/
+
+uint64_t HELPER(fadd_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_add(frs1, frs2, &env->fp_status);
+}
+
+uint64_t HELPER(fsub_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_sub(frs1, frs2, &env->fp_status);
+}
+
+uint64_t HELPER(fmul_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_mul(frs1, frs2, &env->fp_status);
+}
+
+uint64_t HELPER(fmax_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_maxnum(frs1, frs2, &env->fp_status);
+}
+
+uint64_t HELPER(fmin_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_minnum(frs1, frs2, &env->fp_status);
+}
+
+uint64_t HELPER(fdiv_d)(CPURH850State *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_div(frs1, frs2, &env->fp_status);
+}
+
+uint64_t HELPER(fabs_d)(CPURH850State *env, uint64_t frs1)
+{
+    return float64_abs(frs1);
+}
+
+uint64_t HELPER(fneg_d)(CPURH850State *env, uint64_t frs1)
+{
+    return (frs1 ^ 0x8000000000000000);
+}
+
+uint32_t HELPER(ftrnc_dw)(CPURH850State *env, uint64_t frs1)
+{
+    return float64_to_int32_round_to_zero(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(fceil_dw)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to int32 and round to upper value. */
+    return float64_to_int32_scalbn(frs1, float_round_up, 0, &env->fp_status);
+}
+
+uint32_t HELPER(ffloor_dw)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to int32 and round to lower value. */
+    return float64_to_int32_scalbn(frs1, float_round_down, 0, &env->fp_status);
+}
+
+uint32_t HELPER(fcvt_dw)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to int32. */
+    return float64_to_int32(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(ftrnc_duw)(CPURH850State *env, uint64_t frs1)
+{
+    return float64_to_uint32_round_to_zero(frs1, &env->fp_status);
+}
+
+uint32_t HELPER(fceil_duw)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to uint32 and round to upper value. */
+    return float64_to_uint32_scalbn(frs1, float_round_up, 0, &env->fp_status);
+}
+
+uint32_t HELPER(ffloor_duw)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to uint32 and round to lower value. */
+    return float64_to_uint32_scalbn(frs1, float_round_down, 0, &env->fp_status);
+}
+
+uint32_t HELPER(fcvt_duw)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to uint32. */
+    return float64_to_uint32(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(fcvt_wd)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert int32 to float64. */
+    return int32_to_float64(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(fcvt_ld)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert int32 to float64. */
+    return int64_to_float64(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(fcvt_sd)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert float32 to float64. */
+    return float32_to_float64(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(fcvt_uwd)(CPURH850State *env, uint32_t frs1)
+{
+    /* Convert int32 to float64. */
+    return uint32_to_float64(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(fcvt_uld)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert int32 to float64. */
+    return uint64_to_float64(frs1, &env->fp_status);
+}
+
+
+
+uint64_t HELPER(ftrnc_dl)(CPURH850State *env, uint64_t frs1)
+{
+    return float64_to_int64_round_to_zero(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(fceil_dl)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to int64 and round to upper value. */
+    return float64_to_int64_scalbn(frs1, float_round_up, 0, &env->fp_status);
+}
+
+uint64_t HELPER(ffloor_dl)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to int64 and round to lower value. */
+    return float64_to_int64_scalbn(frs1, float_round_down, 0, &env->fp_status);
+}
+
+uint64_t HELPER(fcvt_dl)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to int64. */
+    return float64_to_int64(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(ftrnc_dul)(CPURH850State *env, uint64_t frs1)
+{
+    return float64_to_uint64_round_to_zero(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(fceil_dul)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to uint64 and round to upper value. */
+    return float64_to_uint64_scalbn(frs1, float_round_up, 0, &env->fp_status);
+}
+
+uint64_t HELPER(ffloor_dul)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to uint64 and round to lower value. */
+    return float64_to_uint64_scalbn(frs1, float_round_down, 0, &env->fp_status);
+}
+
+uint64_t HELPER(fcvt_dul)(CPURH850State *env, uint64_t frs1)
+{
+    /* Convert float64 to uint64. */
+    return float64_to_uint64(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(fsqrt_d)(CPURH850State *env, uint64_t frs1)
+{
+    return float64_sqrt(frs1, &env->fp_status);
+}
+
+uint64_t HELPER(frecip_d)(CPURH850State *env, uint64_t frs1)
+{
+    /* Compute 1/x (0x3ff0000000000000 = float64(1.1)). */
+    return float64_div(0x3ff0000000000000, frs1, &env->fp_status);
+}
+
+uint64_t HELPER(frsqrt_d)(CPURH850State *env, uint64_t frs1)
+{
+    /* Compute 1/sqrt(x). */
+    return HELPER(frecip_d)(env, float64_sqrt(frs1, &env->fp_status));
+}
+
+uint32_t HELPER(f_is_nan_d)(CPURH850State *env, uint64_t frs1)
+{
+    /* Check if float64 is NaN. */
+    return float64_is_any_nan(frs1);
+}
+
+
+
+uint64_t helper_fcvt_s_d(CPURH850State *env, uint64_t rs1)
+{
+    return float64_to_float32(rs1, &env->fp_status);
+}
+
+uint64_t helper_fcvt_d_s(CPURH850State *env, uint64_t rs1)
+{
+    return float32_to_float64(rs1, &env->fp_status);
+}
+
+uint32_t helper_fle_d(CPURH850State *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_le(frs1, frs2, &env->fp_status);
+}
+
+uint32_t helper_flt_d(CPURH850State *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_lt(frs1, frs2, &env->fp_status);
+}
+
+uint32_t helper_feq_d(CPURH850State *env, uint64_t frs1, uint64_t frs2)
+{
+    return float64_eq_quiet(frs1, frs2, &env->fp_status);
+}
+
+target_ulong helper_fcvt_w_d(CPURH850State *env, uint64_t frs1)
+{
+    return float64_to_int32(frs1, &env->fp_status);
+}
+
+target_ulong helper_fcvt_wu_d(CPURH850State *env, uint64_t frs1)
+{
+    return (int32_t)float64_to_uint32(frs1, &env->fp_status);
+}
+
+#if defined(TARGET_RH85064)
+uint64_t helper_fcvt_l_d(CPURH850State *env, uint64_t frs1)
+{
+    return float64_to_int64(frs1, &env->fp_status);
+}
+
+uint64_t helper_fcvt_lu_d(CPURH850State *env, uint64_t frs1)
+{
+    return float64_to_uint64(frs1, &env->fp_status);
+}
+#endif
+
+uint64_t helper_fcvt_d_w(CPURH850State *env, target_ulong rs1)
+{
+    return int32_to_float64((int32_t)rs1, &env->fp_status);
+}
+
+uint64_t helper_fcvt_d_wu(CPURH850State *env, target_ulong rs1)
+{
+    return uint32_to_float64((uint32_t)rs1, &env->fp_status);
+}
+
+#if defined(TARGET_RH85064)
+uint64_t helper_fcvt_d_l(CPURH850State *env, uint64_t rs1)
+{
+    return int64_to_float64(rs1, &env->fp_status);
+}
+
+uint64_t helper_fcvt_d_lu(CPURH850State *env, uint64_t rs1)
+{
+    return uint64_to_float64(rs1, &env->fp_status);
+}
+#endif
+
+target_ulong helper_fclass_d(uint64_t frs1)
+{
+    float64 f = frs1;
+    bool sign = float64_is_neg(f);
+
+    if (float64_is_infinity(f)) {
+        return sign ? 1 << 0 : 1 << 7;
+    } else if (float64_is_zero(f)) {
+        return sign ? 1 << 3 : 1 << 4;
+    } else if (float64_is_zero_or_denormal(f)) {
+        return sign ? 1 << 2 : 1 << 5;
+    } else if (float64_is_any_nan(f)) {
+        float_status s = { 0 }; /* for snan_bit_is_one */
+        return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
+    } else {
+        return sign ? 1 << 1 : 1 << 6;
+    }
+}
diff --git a/qemu/target/rh850/fpu_translate.c b/qemu/target/rh850/fpu_translate.c
new file mode 100644
index 0000000000..2fd008177b
--- /dev/null
+++ b/qemu/target/rh850/fpu_translate.c
@@ -0,0 +1,1557 @@
+#include "fpu_translate.h"
+#include "instmap.h"
+
+extern TCGv_i32 cpu_ZF;
+
+/* Helpers */
+void fpu_load_i64(TCGContext *tcg_ctx, TCGv_i64 dst, int reg_n);
+void fpu_load_i64_2(TCGContext *tcg_ctx, TCGv_i64 dst0, TCGv_i64 dst1, int reg_n0, int reg_n1);
+void fpu_store_i64(TCGContext *tcg_ctx, int reg_n, TCGv_i64 src);
+
+/* Single-precision */
+void fpu_gen_sp_ir_3(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs1, int rs2, int rs3);
+void fpu_gen_sp_ir_2(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs2, int rs3);
+void fpu_gen_cmpf_s(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int fcond, int fcbit);
+void fpu_gen_cmov_s(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int rs3, int fcbit);
+void fpu_gen_trfsr(CPURH850State *env, DisasContext *ctx, int fcbit);
+void fpu_gen_cat1_ir(CPURH850State *env, DisasContext *ctx, int op, int frs1, int frs2, int frs3);
+
+
+/* Double precision */
+void fpu_gen_cmpf_d(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int fcond, int fcbit);
+void fpu_gen_cmov_d(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int rs3, int fcbit);
+void fpu_gen_dp_ir_3(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs1, int rs2, int rs3);
+void fpu_gen_dp_ir_2(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs2, int rs3);
+
+
+/**
+ * Helpers for 64-bit register load/store
+ **/
+
+void fpu_load_i64(TCGContext *tcg_ctx, TCGv_i64 dst, int reg_n)
+{
+    TCGv_i32 rl = tcg_temp_local_new_i32(tcg_ctx);
+    TCGv_i32 rh = tcg_temp_local_new_i32(tcg_ctx);
+
+    /* Read float64 from (reg_n/reg_n+1). */
+    gen_get_gpr(tcg_ctx, rl, reg_n);
+    gen_get_gpr(tcg_ctx, rh, reg_n+1);
+    tcg_gen_concat_i32_i64(tcg_ctx, dst, rl, rh);
+
+    /* Free temporary variables. */
+    tcg_temp_free_i32(tcg_ctx, rl);
+    tcg_temp_free_i32(tcg_ctx, rh);
+}
+
+void fpu_store_i64(TCGContext *tcg_ctx, int reg_n, TCGv_i64 src)
+{
+    TCGv_i32 rl = tcg_temp_local_new_i32(tcg_ctx);
+    TCGv_i32 rh = tcg_temp_local_new_i32(tcg_ctx);
+    TCGv_i64 shift = tcg_temp_local_new_i64(tcg_ctx);
+
+    tcg_gen_movi_i64(tcg_ctx, shift, 32);
+    tcg_gen_extrl_i64_i32(tcg_ctx, rl, src);
+    tcg_gen_shr_i64(tcg_ctx, src, src, shift);
+    tcg_gen_extrl_i64_i32(tcg_ctx, rh, src);
+    gen_set_gpr(tcg_ctx, reg_n, rl);
+    gen_set_gpr(tcg_ctx, reg_n + 1, rh);
+
+    /* Free temporary variables. */
+    tcg_temp_free_i32(tcg_ctx, rl);
+    tcg_temp_free_i32(tcg_ctx, rh);
+}
+
+void fpu_load_i64_2(TCGContext *tcg_ctx, TCGv_i64 dst0, TCGv_i64 dst1, int reg_n0, int reg_n1)
+{
+    TCGv_i32 rl = tcg_temp_local_new_i32(tcg_ctx);
+    TCGv_i32 rh = tcg_temp_local_new_i32(tcg_ctx);
+
+    /* Read float64 from (reg_n0/reg_n0 + 1). */
+    gen_get_gpr(tcg_ctx, rl, reg_n0);
+    gen_get_gpr(tcg_ctx, rh, reg_n0 + 1);
+    tcg_gen_concat_i32_i64(tcg_ctx, dst0, rl, rh);
+
+    /* Read float64 from (reg_n1/reg_n1 + 1). */
+    gen_get_gpr(tcg_ctx, rl, reg_n1);
+    gen_get_gpr(tcg_ctx, rh, reg_n1 + 1);
+    tcg_gen_concat_i32_i64(tcg_ctx, dst1, rl, rh);
+
+    /* Free temporary variables. */
+    tcg_temp_free_i32(tcg_ctx, rl);
+    tcg_temp_free_i32(tcg_ctx, rh);
+}
+
+/**
+ * Floating-point simple-precision IR generators.
+ **/
+
+void fpu_gen_cat1_ir(CPURH850State *env, DisasContext *ctx, int op, int frs1, int frs2, int frs3)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGv r1 = tcg_temp_local_new(tcg_ctx);
+    TCGv r2 = tcg_temp_local_new(tcg_ctx);
+    TCGv r3 = tcg_temp_local_new(tcg_ctx);
+
+    /* Load register content from frs1, frs2 and frs3. */
+    gen_get_gpr(tcg_ctx, r1, frs1);
+    gen_get_gpr(tcg_ctx, r2, frs2);
+    gen_get_gpr(tcg_ctx, r3, frs3);
+
+    switch(op)
+    {
+        case OPC_RH850_FPU_FMAF_S:
+            gen_helper_fmaf_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2, r3);
+            break;
+
+        case OPC_RH850_FPU_FMSF_S:
+            gen_helper_fmsf_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2, r3);
+            break;
+        
+        case OPC_RH850_FPU_FNMAF_S:
+            gen_helper_fnmaf_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2, r3);
+            break;
+
+        case OPC_RH850_FPU_FNMSF_S:
+            gen_helper_fnmsf_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2, r3);
+            break;
+
+        default:
+            /* Unknown instruction. */
+            break;
+    }
+
+    /* Store r3 register into frs3. */
+    gen_set_gpr(tcg_ctx, frs3, r3);
+
+    /* Free locals. */
+    tcg_temp_free(tcg_ctx, r1);
+    tcg_temp_free(tcg_ctx, r2);
+    tcg_temp_free(tcg_ctx, r3);
+}
+
+
+void fpu_gen_sp_ir_2(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs2, int rs3)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+    /* rs1, rs2 and rs3 for TCG */
+	TCGv r2 = tcg_temp_local_new_i32(tcg_ctx);
+    TCGv r3 = tcg_temp_local_new_i32(tcg_ctx);
+    TCGv_i64 r3_64 = tcg_temp_local_new_i64(tcg_ctx);
+
+    /* Load contents from registers. */
+    switch(operands)
+    {
+        case FPU_TYPE_S:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r2, rs2);
+	
+                /* Apply operation. */
+                switch(op)
+                {
+                    case FPU_OP_ABS:
+                        gen_helper_fabs_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_NEG:
+                        gen_helper_fneg_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_SQRT:
+                        gen_helper_fsqrt_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_RECIP:
+                        gen_helper_frecip_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_RSQRT:
+                        gen_helper_frsqrt_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+                }
+
+                /* Store result. */
+                gen_set_gpr(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_SL:
+            {
+                /* Load simple-precision float. */
+                gen_get_gpr(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                switch(op)
+                {
+                    case FPU_OP_TRNC:
+                        gen_helper_ftrnc_sl(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CEIL:
+                        gen_helper_fceil_sl(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_FLOOR:
+                        gen_helper_ffloor_sl(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CVT:
+                        gen_helper_fcvt_sl(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2);
+                        break;
+                }
+
+                /* Store result as long. */
+                fpu_store_i64(tcg_ctx, rs3, r3_64);
+            }
+            break;
+
+        case FPU_TYPE_SUL:
+            {
+                /* Load simple-precision float. */
+                gen_get_gpr(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                switch(op)
+                {
+                    case FPU_OP_TRNC:
+                        gen_helper_ftrnc_sul(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CEIL:
+                        gen_helper_fceil_sul(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_FLOOR:
+                        gen_helper_ffloor_sul(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CVT:
+                        gen_helper_fcvt_sul(tcg_ctx, r3_64, tcg_ctx->cpu_env, r2);
+                        break;
+                }
+
+                /* Store result as long. */
+                fpu_store_i64(tcg_ctx, rs3, r3_64);
+            }
+            break;
+
+
+        case FPU_TYPE_SW:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                switch(op)
+                {
+                    case FPU_OP_TRNC:
+                        gen_helper_ftrnc_sw(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CEIL:
+                        gen_helper_fceil_sw(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_FLOOR:
+                        gen_helper_ffloor_sw(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CVT:
+                        gen_helper_fcvt_sw(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+                }
+
+                /* Store result. */
+                gen_set_gpr(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_SUW:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                switch(op)
+                {
+                    case FPU_OP_TRNC:
+                        gen_helper_ftrnc_suw(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CEIL:
+                        gen_helper_fceil_suw(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_FLOOR:
+                        gen_helper_ffloor_suw(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CVT:
+                        gen_helper_fcvt_suw(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+                }
+
+                /* Store result. */
+                gen_set_gpr(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_LS:
+            {
+                /* Load content from register. */
+                fpu_load_i64(tcg_ctx, r3_64, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_ls(tcg_ctx, r3, tcg_ctx->cpu_env, r3_64);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result into rs3. */
+                gen_set_gpr(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_HS:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_hs(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result into rs3. */
+                gen_set_gpr(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_WS:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_ws(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result into rs3. */
+                gen_set_gpr(tcg_ctx, rs3, r3);
+            }
+            break;
+
+
+        case FPU_TYPE_SH:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_sh(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result into rs3. */
+                gen_set_gpr(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_ULS:
+            {
+                /* Load content from register. */
+                fpu_load_i64(tcg_ctx, r3_64, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_uls(tcg_ctx, r3, tcg_ctx->cpu_env, r3_64);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result into rs3. */
+                gen_set_gpr(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_UWS:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_uws(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result into rs3. */
+                gen_set_gpr(tcg_ctx, rs3, r3);
+            }
+            break;
+        
+    }
+
+    /* Mov softfloat flags into our register. */
+    gen_helper_f_sync_fflags(tcg_ctx, tcg_ctx->cpu_env);
+
+    /* Free temp. */
+    tcg_temp_free(tcg_ctx, r2);
+    tcg_temp_free(tcg_ctx, r3);
+    tcg_temp_free_i64(tcg_ctx, r3_64);
+}
+
+/**
+ * refactored
+ **/
+
+void fpu_gen_sp_ir_3(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs1, int rs2, int rs3)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+    /* rs1, rs2 and rs3 for TCG */
+    TCGv r1 = tcg_temp_local_new_i32(tcg_ctx);
+	TCGv r2 = tcg_temp_local_new_i32(tcg_ctx);
+    TCGv r3 = tcg_temp_local_new_i32(tcg_ctx);
+
+    /* Load contents from registers. */
+    switch(operands)
+    {
+        case FPU_TYPE_S:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r1, rs1);
+                gen_get_gpr(tcg_ctx, r2, rs2);
+            }
+            break;
+    }
+
+    /* Apply operation. */
+    switch(op)
+    {
+        case FPU_OP_ADD:
+            gen_helper_fadd_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2);
+            break;
+
+        case FPU_OP_DIV:
+            gen_helper_fdiv_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2, r1);
+            break;
+
+        case FPU_OP_SUB:
+            gen_helper_fsub_s(tcg_ctx, r3, tcg_ctx->cpu_env, r2, r1);
+            break;
+
+        case FPU_OP_MAX:
+            gen_helper_fmax_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2);
+            break;
+
+        case FPU_OP_MIN:
+            gen_helper_fmin_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2);
+            break;
+
+        case FPU_OP_MUL:
+            gen_helper_fmul_s(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2);
+            break;
+    }
+
+    /* Store result. */
+    switch(operands)
+    {
+        case FPU_TYPE_S:
+            {
+                /* Set reg3. */
+                gen_set_gpr(tcg_ctx, rs3, r3);
+            }
+            break;
+    }
+
+    /* Mov softfloat flags into our register. */
+    gen_helper_f_sync_fflags(tcg_ctx, tcg_ctx->cpu_env);
+
+    /* Free temp. */
+    tcg_temp_free(tcg_ctx, r1);
+    tcg_temp_free(tcg_ctx, r2);
+    tcg_temp_free(tcg_ctx, r3);
+}
+
+
+void fpu_gen_trfsr(CPURH850State *env, DisasContext *ctx, int fcbit)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGv fpsr = tcg_temp_local_new(tcg_ctx);
+    TCGv mask = tcg_temp_local_new(tcg_ctx);
+    TCGv shift = tcg_temp_local_new(tcg_ctx);
+    TCGv one = tcg_const_i32(tcg_ctx, 1);
+    TCGv value = tcg_temp_local_new(tcg_ctx);
+
+    /* Load fpsr and compute mask. */
+    gen_get_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, fpsr);
+    tcg_gen_movi_i32(tcg_ctx, shift, 24 + fcbit);
+    tcg_gen_shl_i32(tcg_ctx, mask, one, shift);
+    
+    /* Extract CCn bit. */
+    tcg_gen_and_i32(tcg_ctx, value, fpsr, mask);
+    tcg_gen_shr_i32(tcg_ctx, value, value, shift);
+
+    /* Set Z flag. */
+    tcg_gen_mov_i32(tcg_ctx, cpu_ZF, value);
+    gen_set_gpr(tcg_ctx, 1, value);
+
+    /* Free locals. */
+    tcg_temp_free(tcg_ctx, fpsr);
+    tcg_temp_free(tcg_ctx, mask);
+    tcg_temp_free(tcg_ctx, shift);
+    tcg_temp_free(tcg_ctx, one);
+    tcg_temp_free(tcg_ctx, value);
+}
+
+void fpu_gen_cmov_s(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int rs3, int fcbit)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGLabel *end, *otherwise;
+    TCGv r1 = tcg_temp_local_new(tcg_ctx);
+	TCGv r2 = tcg_temp_local_new(tcg_ctx);
+    TCGv final_shift = tcg_temp_local_new(tcg_ctx);
+    TCGv res = tcg_temp_local_new(tcg_ctx);
+    TCGv fpsr = tcg_temp_local_new(tcg_ctx);
+
+    end = gen_new_label(tcg_ctx);
+    otherwise = gen_new_label(tcg_ctx);
+    
+
+    /* Load register contents. */
+    gen_get_gpr(tcg_ctx, r1, rs1);
+    gen_get_gpr(tcg_ctx, r2, rs2);
+
+    /* Check if FPSR.CCn is set (with n=fcbit). */
+    gen_get_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, fpsr);
+    tcg_gen_movi_i32(tcg_ctx, res, 1);
+    tcg_gen_movi_i32(tcg_ctx, final_shift, 24 + fcbit);
+    tcg_gen_shl_i32(tcg_ctx, res, res, final_shift);
+    tcg_gen_and_i32(tcg_ctx, res, fpsr, res);
+
+    /* If not set, r2 -> r3. */
+    tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, res, 0, otherwise);
+
+    /* If set, do the move ! */
+    gen_set_gpr(tcg_ctx, rs3, r1);
+
+    tcg_gen_br(tcg_ctx, end);
+
+    gen_set_label(tcg_ctx, otherwise);
+
+    gen_set_gpr(tcg_ctx, rs3, r2);
+
+    /* End. */
+    gen_set_label(tcg_ctx, end);
+
+    /* Free variables. */
+    tcg_temp_free(tcg_ctx, r1);
+    tcg_temp_free(tcg_ctx, r2);
+    tcg_temp_free(tcg_ctx, final_shift);
+    tcg_temp_free(tcg_ctx, res);
+    tcg_temp_free(tcg_ctx, fpsr);
+}
+
+void fpu_gen_cmpf_s(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int fcond, int fcbit)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGLabel *handle_nan;
+    TCGLabel *end;
+
+    end = gen_new_label(tcg_ctx);
+    handle_nan = gen_new_label(tcg_ctx);
+
+    TCGv r1 = tcg_temp_local_new(tcg_ctx);
+	TCGv r2 = tcg_temp_local_new(tcg_ctx);
+    TCGv nan1 = tcg_temp_local_new(tcg_ctx);
+    TCGv nan2 = tcg_temp_local_new(tcg_ctx);
+    TCGv less = tcg_temp_local_new(tcg_ctx);
+    TCGv equal = tcg_temp_local_new(tcg_ctx);
+    TCGv unordered = tcg_temp_local_new(tcg_ctx);
+    TCGv res = tcg_temp_local_new(tcg_ctx);
+    TCGv final_shift = tcg_temp_local_new(tcg_ctx);
+    TCGv one = tcg_temp_local_new(tcg_ctx);
+    TCGv mask = tcg_temp_local_new(tcg_ctx);
+
+    tcg_gen_movi_i32(tcg_ctx, one, 1);
+
+    /* Load rs1 and rs2 registers. */
+    gen_get_gpr(tcg_ctx, r1, rs1);
+    gen_get_gpr(tcg_ctx, r2, rs2);
+
+    /* If r1 or r2 is a Nan, then error. */
+    gen_helper_f_is_nan_s(tcg_ctx, nan1, tcg_ctx->cpu_env, r1);
+    gen_helper_f_is_nan_s(tcg_ctx, nan2, tcg_ctx->cpu_env, r2);
+    tcg_gen_brcond_i32(tcg_ctx, TCG_COND_EQ, nan1, one, handle_nan);
+    tcg_gen_brcond_i32(tcg_ctx, TCG_COND_EQ, nan2, one, handle_nan);
+
+    gen_helper_flt_s(tcg_ctx, less, tcg_ctx->cpu_env, r2, r1);
+    gen_helper_feq_s(tcg_ctx, equal, tcg_ctx->cpu_env, r2, r1);
+    tcg_gen_movi_i32(tcg_ctx, unordered, 0);
+    tcg_gen_br(tcg_ctx, end);
+
+    gen_set_label(tcg_ctx, handle_nan);
+
+    tcg_gen_movi_i32(tcg_ctx, less, 0);
+    tcg_gen_movi_i32(tcg_ctx, equal, 0);
+    tcg_gen_movi_i32(tcg_ctx, unordered, 1);
+    if (fcond & 0x8)
+    {
+        /* Invalid operation detected. */
+        /* TODO: raise exception ? */
+    }
+
+    /* This is the end =) */
+    gen_set_label(tcg_ctx, end);
+
+    /* Compute logical result. */
+    tcg_gen_movi_i32(tcg_ctx, res, 0);
+    if (fcond & 1)
+        tcg_gen_or_i32(tcg_ctx, res, res, unordered);
+    if (fcond & 2)
+        tcg_gen_or_i32(tcg_ctx, res, res, equal);
+    if (fcond & 4)
+        tcg_gen_or_i32(tcg_ctx, res, res, less);
+    
+    /**
+     * Set CCn bit into FPSR (with n=fcbit).
+     *  1. Load FPSR into r1
+     *  2. AND r1 with NOT bitmask for CCn
+     *  3. OR bitmask if res == 1
+     *  4. Store r1 into FPSR
+     **/
+    gen_get_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, r1);
+    tcg_gen_movi_i32(tcg_ctx, final_shift, 24 + fcbit);
+    tcg_gen_shl_i32(tcg_ctx, mask, one, final_shift);
+    tcg_gen_andc_tl(tcg_ctx, r1, r1, mask);
+    tcg_gen_shl_i32(tcg_ctx, res, res, final_shift);
+    tcg_gen_or_i32(tcg_ctx, r1, r1, res);
+    gen_set_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, r1);
+
+    /* Free variables. */
+    tcg_temp_free(tcg_ctx, r1);
+    tcg_temp_free(tcg_ctx, r2);
+    tcg_temp_free(tcg_ctx, nan1);
+    tcg_temp_free(tcg_ctx, nan2);
+    tcg_temp_free(tcg_ctx, less);
+    tcg_temp_free(tcg_ctx, equal);
+    tcg_temp_free(tcg_ctx, unordered);
+    tcg_temp_free(tcg_ctx, final_shift);
+    tcg_temp_free(tcg_ctx, one);
+    tcg_temp_free(tcg_ctx, res);
+}
+
+
+/**
+ * Floating-point double-precision IR generators.
+ **/
+
+void fpu_gen_dp_ir_2(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs2, int rs3)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    /* rs1, rs2 and rs3 for TCG */
+	TCGv_i64 r2 = tcg_temp_local_new_i64(tcg_ctx);
+    TCGv_i64 r3 = tcg_temp_local_new_i64(tcg_ctx);
+    TCGv r3_32 = tcg_temp_local_new_i32(tcg_ctx);
+
+    /* Load contents from registers. */
+    switch(operands)
+    {
+        case FPU_TYPE_D:
+            {
+                /* Extract value from register rs2. */
+                fpu_load_i64(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                switch(op)
+                {
+                    case FPU_OP_ABS:
+                        gen_helper_fabs_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_NEG:
+                        gen_helper_fneg_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_SQRT:
+                        gen_helper_fsqrt_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_RECIP:
+                        gen_helper_frecip_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_RSQRT:
+                        gen_helper_frsqrt_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+                }
+                
+                /* Store result. */
+                fpu_store_i64(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_DL:
+            {
+                /* Extract value from register rs2. */
+                fpu_load_i64(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                switch(op)
+                {
+                    case FPU_OP_TRNC:
+                        gen_helper_ftrnc_dl(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CEIL:
+                        gen_helper_fceil_dl(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_FLOOR:
+                        gen_helper_ffloor_dl(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CVT:
+                        gen_helper_fcvt_dl(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                }
+                
+                /* Store result. */
+                fpu_store_i64(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_DUL:
+            {
+                /* Extract value from register rs2. */
+                fpu_load_i64(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                switch(op)
+                {
+                    case FPU_OP_TRNC:
+                        gen_helper_ftrnc_dul(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CEIL:
+                        gen_helper_fceil_dul(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_FLOOR:
+                        gen_helper_ffloor_dul(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CVT:
+                        gen_helper_fcvt_dul(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                        break;
+
+                }
+                
+                /* Store result. */
+                fpu_store_i64(tcg_ctx, rs3, r3);
+            }
+            break;
+
+
+        case FPU_TYPE_DW:
+            {
+                /* Extract value from register rs2. */
+                fpu_load_i64(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                switch(op)
+                {
+                    case FPU_OP_TRNC:
+                        gen_helper_ftrnc_dw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CEIL:
+                        gen_helper_fceil_dw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_FLOOR:
+                        gen_helper_ffloor_dw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CVT:
+                        gen_helper_fcvt_dw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2);
+                        break;
+
+                }
+                
+                /* Store result. */
+                gen_set_gpr(tcg_ctx, rs3, r3_32);
+            }
+            break;
+
+        case FPU_TYPE_DUW:
+            {
+                /* Extract value from register rs2. */
+                fpu_load_i64(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                switch(op)
+                {
+                    case FPU_OP_TRNC:
+                        gen_helper_ftrnc_duw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CEIL:
+                        gen_helper_fceil_duw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_FLOOR:
+                        gen_helper_ffloor_duw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2);
+                        break;
+
+                    case FPU_OP_CVT:
+                        gen_helper_fcvt_duw(tcg_ctx, r3_32, tcg_ctx->cpu_env, r2);
+                        break;
+
+                }
+                
+                /* Store result. */
+                gen_set_gpr(tcg_ctx, rs3, r3_32);
+            }
+            break;
+
+
+        case FPU_TYPE_LD:
+            {
+                /* Load content from register. */
+                fpu_load_i64(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_ld(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result. */
+                fpu_store_i64(tcg_ctx, rs3, r3);
+            }
+            break;
+
+
+        case FPU_TYPE_WD:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r3_32, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_wd(tcg_ctx, r3, tcg_ctx->cpu_env, r3_32);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result. */
+                fpu_store_i64(tcg_ctx, rs3, r3);
+            }
+            break;
+
+
+        case FPU_TYPE_SD:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r3_32, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_sd(tcg_ctx, r3, tcg_ctx->cpu_env, r3_32);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result. */
+                fpu_store_i64(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_UWD:
+            {
+                /* Extract value of reg1 and reg2. */
+                gen_get_gpr(tcg_ctx, r3_32, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_uwd(tcg_ctx, r3, tcg_ctx->cpu_env, r3_32);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result. */
+                fpu_store_i64(tcg_ctx, rs3, r3);
+            }
+            break;
+
+        case FPU_TYPE_ULD:
+            {
+                /* Load content from register. */
+                fpu_load_i64(tcg_ctx, r2, rs2);
+
+                /* Apply operation. */
+                if (op == FPU_OP_CVT)
+                {
+                    gen_helper_fcvt_uld(tcg_ctx, r3, tcg_ctx->cpu_env, r2);
+                }
+                else
+                {
+                    /* Unsupported operation. */
+                }
+
+                /* Store result. */
+                fpu_store_i64(tcg_ctx, rs3, r3);
+            }
+            break;
+
+    }
+
+    /* Mov softfloat flags into our register. */
+    gen_helper_f_sync_fflags(tcg_ctx, tcg_ctx->cpu_env);
+
+    /* Free temp. */
+    tcg_temp_free_i64(tcg_ctx, r2);
+    tcg_temp_free_i64(tcg_ctx, r3);
+    tcg_temp_free_i32(tcg_ctx, r3_32);
+}
+
+
+void fpu_gen_dp_ir_3(CPURH850State *env, DisasContext *ctx, int operands, int op, int rs1, int rs2, int rs3)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    /* rs1, rs2 and rs3 for TCG */
+    TCGv_i64 r1 = tcg_temp_local_new_i64(tcg_ctx);
+	TCGv_i64 r2 = tcg_temp_local_new_i64(tcg_ctx);
+    TCGv_i64 r3 = tcg_temp_local_new_i64(tcg_ctx);
+
+    /* Load contents from registers. */
+    switch(operands)
+    {
+        case FPU_TYPE_D:
+            {
+                /* Load float64 values from regpairs designed by rs1 and rs2. */
+                fpu_load_i64_2(tcg_ctx, r1, r2, rs1, rs2);
+            }
+            break;
+    }
+
+    switch(op)
+    {
+        case FPU_OP_ADD:
+            gen_helper_fadd_d(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2);
+            break;
+
+        case FPU_OP_DIV:
+            gen_helper_fdiv_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2, r1);
+            break;
+
+        case FPU_OP_SUB:
+            gen_helper_fsub_d(tcg_ctx, r3, tcg_ctx->cpu_env, r2, r1);
+            break;
+
+        case FPU_OP_MAX:
+            gen_helper_fmax_d(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2);
+            break;
+
+        case FPU_OP_MIN:
+            gen_helper_fmin_d(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2);
+            break;
+
+        case FPU_OP_MUL:
+            gen_helper_fmul_d(tcg_ctx, r3, tcg_ctx->cpu_env, r1, r2);
+            break;
+    }
+
+    switch(operands)
+    {
+        case FPU_TYPE_D:
+            {
+                /* Store result as float64 in regpair designed by rs3. */
+                fpu_store_i64(tcg_ctx, rs3, r3);
+            }
+            break;
+    }
+
+    /* Mov softfloat flags into our register. */
+    gen_helper_f_sync_fflags(tcg_ctx, tcg_ctx->cpu_env);
+
+    /* Free temp. */
+    tcg_temp_free_i64(tcg_ctx, r1);
+    tcg_temp_free_i64(tcg_ctx, r2);
+    tcg_temp_free_i64(tcg_ctx, r3);
+}
+
+
+void fpu_gen_cmpf_d(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int fcond, int fcbit)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGLabel *handle_nan;
+    TCGLabel *end;
+
+    end = gen_new_label(tcg_ctx);
+    handle_nan = gen_new_label(tcg_ctx);
+
+    TCGv_i64 r1 = tcg_temp_local_new_i64(tcg_ctx);
+	TCGv_i64 r2 = tcg_temp_local_new_i64(tcg_ctx);
+    TCGv nan1 = tcg_temp_local_new(tcg_ctx);
+    TCGv nan2 = tcg_temp_local_new(tcg_ctx);
+    TCGv less = tcg_temp_local_new(tcg_ctx);
+    TCGv equal = tcg_temp_local_new(tcg_ctx);
+    TCGv unordered = tcg_temp_local_new(tcg_ctx);
+    TCGv res = tcg_temp_local_new(tcg_ctx);
+    TCGv final_shift = tcg_temp_local_new(tcg_ctx);
+    TCGv one = tcg_temp_local_new(tcg_ctx);
+    TCGv mask = tcg_temp_local_new(tcg_ctx);
+
+    tcg_gen_movi_i32(tcg_ctx, one, 1);
+
+    /* Load rs1 and rs2 registers. */
+    fpu_load_i64(tcg_ctx, r1, rs1);
+    fpu_load_i64(tcg_ctx, r2, rs2);
+
+    /* If r1 or r2 is a Nan, then error. */
+    gen_helper_f_is_nan_d(tcg_ctx, nan1, tcg_ctx->cpu_env, r1);
+    gen_helper_f_is_nan_d(tcg_ctx, nan2, tcg_ctx->cpu_env, r2);
+    tcg_gen_or_i32(tcg_ctx, nan1, nan1, nan2);
+    tcg_gen_brcond_i32(tcg_ctx, TCG_COND_EQ, nan1, one, handle_nan);
+    tcg_gen_brcond_i32(tcg_ctx, TCG_COND_EQ, nan2, one, handle_nan);
+
+    gen_helper_flt_d(tcg_ctx, less, tcg_ctx->cpu_env, r2, r1);
+    gen_helper_feq_d(tcg_ctx, equal, tcg_ctx->cpu_env, r2, r1);
+    tcg_gen_movi_i32(tcg_ctx, unordered, 0);
+    tcg_gen_br(tcg_ctx, end);
+
+    gen_set_label(tcg_ctx, handle_nan);
+
+    tcg_gen_movi_i32(tcg_ctx, less, 0);
+    tcg_gen_movi_i32(tcg_ctx, equal, 0);
+    tcg_gen_movi_i32(tcg_ctx, unordered, 1);
+    if (fcond & 0x8)
+    {
+        /* Invalid operation detected. */
+        /* TODO: raise exception ? */
+    }
+
+    /* This is the end =) */
+    gen_set_label(tcg_ctx, end);
+
+    /* Set FPSR.CCn */
+    tcg_gen_movi_i32(tcg_ctx, res, 0);
+    if (fcond & 1)
+        tcg_gen_or_i32(tcg_ctx, res, res, unordered);
+    if (fcond & 2)
+        tcg_gen_or_i32(tcg_ctx, res, res, equal);
+    if (fcond & 4)
+        tcg_gen_or_i32(tcg_ctx, res, res, less);
+    
+    /**
+     * Set CCn bit into FPSR (with n=fcbit).
+     *  1. Load FPSR into r1
+     *  2. AND r1 with NOT bitmask for CCn
+     *  3. OR bitmask if res == 1
+     *  4. Store r1 into FPSR
+     **/
+    gen_get_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, nan1);
+    tcg_gen_movi_i32(tcg_ctx, final_shift, 24 + fcbit);
+    tcg_gen_shl_i32(tcg_ctx, mask, one, final_shift);
+    tcg_gen_andc_tl(tcg_ctx, nan1, nan1, mask);
+    tcg_gen_shl_i32(tcg_ctx, res, res, final_shift);
+    tcg_gen_or_i32(tcg_ctx, nan1, nan1, res);
+    gen_set_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, nan1);
+
+    /* Free variables. */
+    tcg_temp_free_i64(tcg_ctx, r1);
+    tcg_temp_free_i64(tcg_ctx, r2);
+    tcg_temp_free(tcg_ctx, nan1);
+    tcg_temp_free(tcg_ctx, nan2);
+    tcg_temp_free(tcg_ctx, less);
+    tcg_temp_free(tcg_ctx, equal);
+    tcg_temp_free(tcg_ctx, unordered);
+    tcg_temp_free(tcg_ctx, final_shift);
+    tcg_temp_free(tcg_ctx, one);
+    tcg_temp_free(tcg_ctx, mask);
+}
+
+void fpu_gen_cmov_d(CPURH850State *env, DisasContext *ctx, int rs1, int rs2, int rs3, int fcbit)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGLabel *end, *otherwise;
+    TCGv_i64 r1 = tcg_temp_local_new_i64(tcg_ctx);
+	TCGv_i64 r2 = tcg_temp_local_new_i64(tcg_ctx);
+    TCGv final_shift = tcg_temp_local_new(tcg_ctx);
+    TCGv res = tcg_temp_local_new(tcg_ctx);
+    TCGv fpsr = tcg_temp_local_new(tcg_ctx);
+
+    end = gen_new_label(tcg_ctx);
+    otherwise = gen_new_label(tcg_ctx);
+    
+
+    /* Load register contents. */
+    fpu_load_i64(tcg_ctx, r1, rs1);
+    fpu_load_i64(tcg_ctx, r2, rs2);
+
+    /* Check if FPSR.CCn is set (with n=fcbit). */
+    gen_get_spr(tcg_ctx, BANK_ID_BASIC_0, FPSR_IDX, fpsr);
+    tcg_gen_movi_i32(tcg_ctx, res, 1);
+    tcg_gen_movi_i32(tcg_ctx, final_shift, 24 + fcbit);
+    tcg_gen_shl_i32(tcg_ctx, res, res, final_shift);
+    tcg_gen_and_i32(tcg_ctx, res, fpsr, res);
+
+    /* If not set, r2 -> r3. */
+    tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, res, 0, otherwise);
+
+    /* If set, do the move ! */
+    fpu_store_i64(tcg_ctx, rs3, r1);
+
+    tcg_gen_br(tcg_ctx, end);
+
+    gen_set_label(tcg_ctx, otherwise);
+
+    fpu_store_i64(tcg_ctx, rs3, r2);
+
+    /* End. */
+    gen_set_label(tcg_ctx, end);
+
+    /* Free variables. */
+    tcg_temp_free_i64(tcg_ctx, r1);
+    tcg_temp_free_i64(tcg_ctx, r2);
+    tcg_temp_free(tcg_ctx, final_shift);
+    tcg_temp_free(tcg_ctx, res);
+    tcg_temp_free(tcg_ctx, fpsr);
+}
+
+
+/**
+ * Instruction decoding and IR generation.
+ **/
+
+void fpu_decode_cat0_instn(CPURH850State *env, DisasContext *ctx)
+{
+    int rs1 = GET_RS1(ctx->opcode);
+    int rs2 = GET_RS2(ctx->opcode);
+    int rs3 = GET_RS3(ctx->opcode);
+    
+    switch(MASK_OP_FORMAT_FI(ctx->opcode))
+    {
+        case OPC_RH850_FPU_GROUP_SW:
+            switch(rs1)
+            {
+                case OPC_RH850_FPU_TRNCF_SW:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SW, FPU_OP_TRNC, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CEILF_SW:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SW, FPU_OP_CEIL, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_FLOORF_SW:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SW, FPU_OP_FLOOR, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_SW:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SW, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_TRNCF_SUW:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUW, FPU_OP_TRNC, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CEILF_SUW:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUW, FPU_OP_CEIL, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_FLOORF_SUW:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SW, FPU_OP_FLOOR, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_SUW:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUW, FPU_OP_CVT, rs2, rs3);
+                    break;
+            }
+            break;
+
+        case OPC_RH850_FPU_GROUP_DS:
+            switch(rs1)
+            {
+                case OPC_RH850_FPU_CVTF_WS:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_WS, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_LS:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_LS, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_HS:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_HS, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_SH:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SH, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_UWS:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_UWS, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_ULS:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_ULS, FPU_OP_CVT, rs2, rs3);
+                    break;
+            }
+            break;
+
+        case OPC_RH850_FPU_GROUP_SL:
+            switch(rs1)
+            {
+                case OPC_RH850_FPU_TRNCF_SL:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SL, FPU_OP_TRNC, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CEILF_SL:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SL, FPU_OP_CEIL, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_FLOORF_SL:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SL, FPU_OP_FLOOR, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_SL:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SL, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_TRNCF_SUL:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUL, FPU_OP_TRNC, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CEILF_SUL:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUL, FPU_OP_CEIL, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_FLOORF_SUL:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUL, FPU_OP_FLOOR, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_SUL:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_SUL, FPU_OP_CVT, rs2, rs3);
+                    break;
+            }
+            break;
+
+        case OPC_RH850_FPU_GROUP_ABSS:
+            switch(rs1)
+            {
+                case OPC_RH850_FPU_ABSF_S:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_S, FPU_OP_ABS, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_NEGF_S:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_S, FPU_OP_NEG, rs2, rs3);
+                    break;
+            }
+            break;
+
+        case OPC_RH850_FPU_GROUP_S:
+            switch(rs1)
+            {
+                case OPC_RH850_FPU_SQRTF_S:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_S, FPU_OP_SQRT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_RECIPF_S:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_S, FPU_OP_RECIP, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_RSQRTF_S:
+                    fpu_gen_sp_ir_2(env, ctx, FPU_TYPE_S, FPU_OP_RSQRT, rs2, rs3);
+                    break;
+            }
+            break;
+
+        case OPC_RH850_FPU_GROUP_DW:
+            switch(rs1)
+            {
+                case OPC_RH850_FPU_TRNCF_DW:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DW, FPU_OP_TRNC, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CEILF_DW:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DW, FPU_OP_CEIL, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_FLOORF_DW:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DW, FPU_OP_FLOOR, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_DW:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DW, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_TRNCF_DUW:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUW, FPU_OP_TRNC, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CEILF_DUW:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUW, FPU_OP_CEIL, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_FLOORF_DUW:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUW, FPU_OP_FLOOR, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_DUW:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUW, FPU_OP_CVT, rs2, rs3);
+                    break;
+            }
+            break;
+
+        case OPC_RH850_FPU_GROUP_DD:
+            switch(rs1)
+            {
+                case OPC_RH850_FPU_CVTF_WD:
+                    //fpu_gen_cvtf_wd(env, ctx, rs2, rs3);
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_WD, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_LD:
+                    //fpu_gen_cvtf_ld(env, ctx, rs2, rs3);
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_LD, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_SD:
+                    //fpu_gen_cvtf_sd(env, ctx, rs2, rs3);
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_SD, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_UWD:
+                    //fpu_gen_cvtf_uwd(env, ctx, rs2, rs3);
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_UWD, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_ULD:
+                    //fpu_gen_cvtf_uld(env, ctx, rs2, rs3);
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_ULD, FPU_OP_CVT, rs2, rs3);
+                    break;
+            }
+            break;
+
+        case OPC_RH850_FPU_GROUP_DL:
+            switch(rs1)
+            {
+                case OPC_RH850_FPU_TRNCF_DL:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DL, FPU_OP_TRNC, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CEILF_DL:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DL, FPU_OP_CEIL, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_FLOORF_DL:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DL, FPU_OP_FLOOR, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_DL:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DL, FPU_OP_CVT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_TRNCF_DUL:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUL, FPU_OP_TRNC, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CEILF_DUL:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUL, FPU_OP_CEIL, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_FLOORF_DUL:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUL, FPU_OP_FLOOR, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_CVTF_DUL:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_DUL, FPU_OP_CVT, rs2, rs3);
+                    break;
+            }
+            break;
+
+        case OPC_RH850_FPU_GROUP_ABSD:
+            switch(rs1)
+            {
+                case OPC_RH850_FPU_ABSF_D:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_D, FPU_OP_ABS, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_NEGF_D:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_D, FPU_OP_NEG, rs2, rs3);
+                    break;
+            }
+            break;
+
+        case OPC_RH850_FPU_GROUP_D:
+            switch(rs1)
+            {
+                case OPC_RH850_FPU_SQRTF_D:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_D, FPU_OP_SQRT, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_RECIPF_D:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_D, FPU_OP_RECIP, rs2, rs3);
+                    break;
+
+                case OPC_RH850_FPU_RSQRTF_D:
+                    fpu_gen_dp_ir_2(env, ctx, FPU_TYPE_D, FPU_OP_RSQRT, rs2, rs3);
+                    break;
+            }
+            break;
+
+        case OPC_RH850_FPU_ADDF_S:
+            fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_ADD, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_ADDF_D:
+            /* rs1, rs2 and rs3 must have bit 0 set to 0. */
+            if ((rs1 & 1) || (rs2 & 1) || (rs3 & 1))
+            {
+                /* TODO: Invalid instruction, must trigger exception.  */
+            }
+            else
+                fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_ADD, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_SUBF_S:
+            fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_SUB, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_SUBF_D:
+            fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_SUB, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_MULF_S:
+            fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_MUL, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_MULF_D:
+            fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_MUL, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_MAXF_S:
+            fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_MAX, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_MAXF_D:
+            fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_MAX, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_MINF_S:
+            fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_MIN, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_MINF_D:
+            fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_MIN, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_DIVF_S:
+            fpu_gen_sp_ir_3(env, ctx, FPU_TYPE_S, FPU_OP_DIV, rs1, rs2, rs3);
+            break;
+
+        case OPC_RH850_FPU_DIVF_D:
+            fpu_gen_dp_ir_3(env, ctx, FPU_TYPE_D, FPU_OP_DIV, rs1, rs2, rs3);
+            break;
+
+
+        default:
+            switch(ctx->opcode & (0x70 << 16))
+            {
+                case OPC_RH850_FPU_CMOV_S_OR_TRFSR:
+
+                    /* If reg1==reg2==reg3==0, then it is a TRSFR instruction. */
+                    if ((rs1 == 0) && (rs2 == 0) && (rs3 == 0))
+                    {
+                        fpu_gen_trfsr(env, ctx, (ctx->opcode & (0xe << 16))>>17 );
+                    }
+                    else
+                    {
+                        /* Call generator with fcbit. */
+                        fpu_gen_cmov_s(env, ctx, rs1, rs2, rs3, (ctx->opcode & (0xe << 16))>>17 );
+                    }
+                    break;
+
+                case OPC_RH850_FPU_CMOV_D:
+                    /* Call generator with fcbit. */
+                    fpu_gen_cmov_d(env, ctx, rs1, rs2, rs3, (ctx->opcode & (0xe << 16))>>17 );
+                    break;
+
+                case OPC_RH850_FPU_CMP_S:
+                    /* Call generator with fcond (rs3) and fcbit. */
+                    fpu_gen_cmpf_s(env, ctx, rs1, rs2, rs3, (ctx->opcode & (0xe << 16))>>17 );
+                    break;
+
+                case OPC_RH850_FPU_CMP_D:
+                    /* Call generator with fcond (rs3) and fcbit. */
+                    fpu_gen_cmpf_d(env, ctx, rs1, rs2, rs3, (ctx->opcode & (0xe << 16))>>17 );
+                    break;
+
+                default:
+                    /* Unknown inst. */
+                    break;
+            }
+            break;
+    }
+}
+
+void fpu_decode_cat1_instn(CPURH850State *env, DisasContext *ctx)
+{
+    int rs1 = GET_RS1(ctx->opcode);
+    int rs2 = GET_RS2(ctx->opcode);
+    int rs3 = GET_RS3(ctx->opcode);
+
+    fpu_gen_cat1_ir(env, ctx, MASK_OP_FORMAT_FI(ctx->opcode), rs1, rs2, rs3);
+}
+
+/**
+ * Initialize FPU.
+ **/
+
+void rh850_fpu_translate_init(void)
+{
+}
\ No newline at end of file
diff --git a/qemu/target/rh850/fpu_translate.h b/qemu/target/rh850/fpu_translate.h
new file mode 100644
index 0000000000..b21af6759f
--- /dev/null
+++ b/qemu/target/rh850/fpu_translate.h
@@ -0,0 +1,41 @@
+/*
+ * QEMU RH850 CPU
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2017-2018 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef RH850_FPU_H
+#define RH850_FPU_H
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg-op.h"
+#include "translate.h"
+#include "fpu_translate.h"
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "exec/helper-gen.h"
+#include "exec/translator.h"
+#include "translate.h"
+
+void fpu_decode_cat0_instn(CPURH850State *env, DisasContext *ctx);
+void fpu_decode_cat1_instn(CPURH850State *env, DisasContext *ctx);
+void fpu_init(CPURH850State *env);
+void rh850_fpu_translate_init(void);
+
+#endif /* RH850_FPU_H */
\ No newline at end of file
diff --git a/qemu/target/rh850/gdbstub.c b/qemu/target/rh850/gdbstub.c
new file mode 100644
index 0000000000..2abc97fb64
--- /dev/null
+++ b/qemu/target/rh850/gdbstub.c
@@ -0,0 +1,169 @@
+/*
+ * RH850 GDB Server Stub
+ *
+ * Copyright (c) 2019-2020 Marko Klopcic, iSYSTEM Labs
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "exec/gdbstub.h"
+#include "cpu.h"
+
+/* Mapping of winIDEA register index to env->sysBasicRegs() index. (see mail
+ * from Matic 2019-05-06 and isystem/doc/v850-tdep.c)
+       QEMU idx             wI idx
+        32, //      eipc      0
+        33, //      eipsw     1
+        34, //      fepc      2
+        35, //      fepsw     3
+        37, //      psw       4
+       128, //      fpsr      5
+       129, //      fpepc     6
+       130, //      fpst      7
+       131, //      fpcc      8
+       132, //      fpcfg     9
+       133, //      fpec     10
+        44, //      SESR    N/A
+        45, //      EIIC     11
+        46, //  	FEIC     12
+        48, //      CTPC     13
+        49, //      CTPSW    14
+        52, //      CTBP     15
+        60, //      EIWR     16
+        61, //      FEWR     17
+        63, //      BSEL     18
+       150, //      mcfg0    19
+       152, //	    RBASE    20
+       153, //      EBASE    21
+       154, //      intbp    22
+       155, //      mctl	 23
+       156, //      pid      24
+       161, //      sccfg    25
+       162, //      scbp     26
+       182, //      htcfg0   27
+       188, //      mea      28
+       189, //      asid     29
+       190  //      mei      30
+*/
+#define BANK_MASK  0xf0000
+#define BANK_SHIFT 16
+#define SRI(selID, regID) (((selID) << BANK_SHIFT) | (regID))
+#define SRI0(regID) (regID)
+#define SRI1(regID) SRI(1, (regID))
+#define SRI2(regID) SRI(2, (regID))
+
+typedef int IdxType;
+const IdxType winIdeaRegIdx2qemuSysRegIdx[] = {
+// 0          1            2            3            4            5            6            7            8            9
+// ---------------------------------------------
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        //  0
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        //  1
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        //  2
+
+-1,          -1, SRI0(EIPC_IDX), SRI0(EIPSW_IDX),SRI0(FEPC_IDX),SRI0(FEPSW_IDX),-1, SRI0(PSW_IDX),      -1,          -1,        //  3
+-1,          -1,          -1,          -1,          -1, SRI0(EIIC_IDX),SRI0(FEIC_IDX),-1,SRI0(CTPC_IDX),SRI0(CTPSW_IDX),        //  4
+-1,          -1,   SRI0(CTBP_IDX),     -1,          -1,          -1,          -1,          -1,          -1,          -1,        //  5
+
+SRI0(EIWR_IDX),SRI0(FEWR_IDX),-1,SRI0(BSEL_IDX),    -1,          -1,          -1,          -1,          -1,          -1,        //  6
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        //  7
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        //  8
+
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        //  9
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        // 10
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        // 11
+
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1, SRI0(FPSR_IDX), SRI0(FEPC_IDX),        // 12
+SRI0(FPST_IDX),SRI0(FPCC_IDX),SRI0(FPCFG_IDX),SRI0(FPEC_IDX), -1,-1,          -1,          -1,          -1,          -1,        // 13
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        // 14
+
+SRI1(MCFG0_IDX1),-1,SRI1(RBASE_IDX1),SRI1(EBASE_IDX1),SRI1(INTBP_IDX1),SRI1(MCTL_IDX1),SRI1(PID_IDX1),-1,-1,          -1,       // 15
+-1, SRI1(SCCFG_IDX1), SRI1(SCBP_IDX1),  -1,          -1,          -1,          -1,          -1,          -1,          -1,       // 16
+-1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        // 17
+
+-1,          -1,SRI2(HTCFG0_IDX2),     -1,          -1,          -1,          -1,          -1,SRI2(MEA_IDX2),SRI2(ASID_IDX2),   // 18
+SRI2(MEI_IDX2), -1,       -1,          -1,          -1,          -1,          -1,          -1,          -1,          -1,        // 19
+};
+
+const int NUM_GDB_REGS = sizeof(winIdeaRegIdx2qemuSysRegIdx) / sizeof(IdxType);
+
+int rh850_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
+{
+    RH850CPU *cpu = RH850_CPU(cs);
+    CPURH850State *env = &cpu->env;
+
+    if (n < 32) {
+        return gdb_get_regl(mem_buf, env->gpRegs[n]);  //gpr is now supposed to be progRegs
+    } else if (n == 64) {
+        return gdb_get_regl(mem_buf, env->pc);
+    } else if (n < NUM_GDB_REGS) {
+    	int sysRegIdx = winIdeaRegIdx2qemuSysRegIdx[n];
+    	if (sysRegIdx >= 0) {
+            int selID = sysRegIdx >> BANK_SHIFT;
+            int regID = sysRegIdx & ~BANK_MASK;
+            if (selID ==  BANK_ID_BASIC_0  &&  regID == PSW_IDX) {
+                int psw = env->Z_flag | (env->S_flag  << 1) | (env->OV_flag << 2) | (env->CY_flag << 3);
+                psw |= (env->SAT_flag << 4) | (env->ID_flag << 5) | (env->EP_flag << 6);
+                psw |= (env->NP_flag << 7) | (env->EBV_flag << 15) | (env->CU0_flag << 16);
+                psw |= (env->CU1_flag << 17) | (env->CU2_flag << 18) | (env->UM_flag << 30);
+                return gdb_get_regl(mem_buf, psw);
+            } else {
+                return gdb_get_regl(mem_buf, env->systemRegs[selID][regID]); // eipc, eipsw, fepc, fepsw, psw, ...
+            }
+    	}
+    }
+
+    *((uint32_t *)mem_buf) = 0xBAD0BAD0;
+    return 4; // registers in slots not set above are ignored
+}
+
+int rh850_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
+{
+    RH850CPU *cpu = RH850_CPU(cs);
+    CPURH850State *env = &cpu->env;
+    // at the moment our GDB server has different indices for writing single register
+    // will fix this if batch write will have to be supported or interfacing
+    // to other GDB servers for RH850 will be needed.
+    if (n > 0  &&  n < 32) {  // skip R0, because it is always 0
+        env->gpRegs[n] = ldtul_p(mem_buf);
+    } else if (n == 64) {
+        env->pc = ldtul_p(mem_buf);
+    } else if (n < NUM_GDB_REGS) {
+    	int sysRegIdx = winIdeaRegIdx2qemuSysRegIdx[n];
+    	if (sysRegIdx >= 0) {
+    	    int selID = sysRegIdx >> BANK_SHIFT;
+    	    int regID = sysRegIdx & ~BANK_MASK;
+            if (selID ==  BANK_ID_BASIC_0  &&  regID == PSW_IDX) {
+                int psw = ldtul_p(mem_buf);
+                env->Z_flag = psw & 1;
+                env->S_flag = (psw >> 1) & 1;
+                env->OV_flag = (psw >> 2) & 1;
+                env->CY_flag = (psw >> 3) & 1;
+                env->SAT_flag = (psw >> 4) & 1;
+                env->ID_flag = (psw >> 5) & 1;
+                env->EP_flag = (psw >> 6) & 1;
+                env->NP_flag = (psw >> 7) & 1;
+                env->EBV_flag = (psw >> 15) & 1;
+                env->CU0_flag = (psw >> 16) & 1;
+                env->CU1_flag = (psw >> 17) & 1;
+                env->CU2_flag = (psw >> 18) & 1;
+                env->UM_flag = (psw >> 30) & 1;
+            } else {
+                env->systemRegs[selID][regID] = ldtul_p(mem_buf); // eipc, eipsw, fepc, fepsw, psw, ...
+            }
+    	}
+    }
+
+    return sizeof(target_ulong);
+}
diff --git a/qemu/target/rh850/helper.c b/qemu/target/rh850/helper.c
new file mode 100644
index 0000000000..ee171f0dbb
--- /dev/null
+++ b/qemu/target/rh850/helper.c
@@ -0,0 +1,539 @@
+/*
+ * RH850 emulation helpers for qemu.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2017-2018 SiFive, Inc.
+ * Copyright (c) 2018-2019 iSYSTEM Labs d.o.o.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+
+#define RH850_DEBUG_INTERRUPT 0
+
+int rh850_cpu_mmu_index(CPURH850State *env, bool ifetch)
+{
+  return 0;
+}
+
+#ifndef CONFIG_USER_ONLY
+/*
+ * Return RH850 IRQ number if an interrupt should be taken, else -1.
+ * Used in cpu-exec.c
+ *
+ * Adapted from Spike's processor_t::take_interrupt()
+ */
+
+#if 0 /* Not used */
+static int rh850_cpu_hw_interrupts_pending(CPURH850State *env)
+{
+
+    return EXCP_NONE;
+}
+#endif
+#endif
+
+uint32_t psw2int(CPURH850State * env);
+uint32_t mem_deref_4(CPUState * cs, uint32_t addr);
+
+
+uint32_t psw2int(CPURH850State * env)
+{
+  uint32_t ret = 0; 
+  ret |= env->UM_flag<<30;
+  ret |= env->CU0_flag<<16;
+  ret |= env->CU1_flag<<17;
+  ret |= env->CU2_flag<<18;
+  ret |= env->EBV_flag<<15;
+  ret |= env->NP_flag<<7;
+  ret |= env->EP_flag<<6;
+  ret |= env->ID_flag<<5; 
+  ret |= env->SAT_flag<<4;
+  ret |= env->CY_flag<<3;
+  ret |= env->OV_flag<<2;
+  ret |= env->S_flag<<1;
+  ret |= env->Z_flag; 
+
+  return ret;
+}
+
+/*
+ * RH850 interrupt handler.
+ **/
+
+bool rh850_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+{
+#if !defined(CONFIG_USER_ONLY)
+    RH850CPU *cpu = RH850_CPU(cs);
+    CPURH850State *env = &cpu->env;
+
+    //qemu_log("[cpu] exec_interrupt: got interrupt_req=%08x\n", interrupt_request);
+
+    /* Handle FENMI interrupt. */
+    if (interrupt_request == RH850_INT_FENMI)
+    {
+        /* Set exception info. */
+        cs->exception_index = RH850_EXCP_FENMI;
+        env->exception_cause = 0xE0;
+        env->exception_priority = 1;
+
+        /* Acknowledge interrupt. */
+        rh850_cpu_do_interrupt(cs);
+    }
+    else if (interrupt_request == RH850_INT_FEINT)
+    {
+        if (!(env->systemRegs[BANK_ID_BASIC_2][PMR_IDX2] & (1<<env->exception_priority)))
+        {
+            /* Set exception info. */
+            cs->exception_index = RH850_EXCP_FEINT;
+            env->exception_cause = 0xF0;
+            env->exception_priority = 3;
+
+            /* Acknowledge interrupt. */
+            rh850_cpu_do_interrupt(cs);
+        }
+    }
+    else if (interrupt_request == RH850_EXCP_EIINT)
+    {
+        //qemu_log("exec_interrupt got RH850_EXCP_EIINT\n");
+
+        /* Get interrupt request number. */
+        //int intn = env->exception_cause & 0xfff;
+        int priority = 4;
+
+        //qemu_log("[cpu] exec_interrupt: got interrupt_req=%08x\n", interrupt_request);
+
+        /* Check if interrupt priority is not masked (through PMR). */
+        if (!(env->systemRegs[BANK_ID_BASIC_2][PMR_IDX2] & (1<<priority)))
+        {
+            /**
+             * Interrupt is not masked, process it.
+             * We set the exception index to RH850_EXCP_EIINT to notify an EIINT interrupt,
+             * and we set the exception cause to indicate the channel.
+             **/
+
+            /* Set exception info. */
+            cs->exception_index = RH850_EXCP_EIINT;
+            //env->exception_cause = 0x1000 | (intn);
+            //env->exception_dv = !(interrupt_request & RH850_INT_TAB_REF);
+            env->exception_priority = priority;
+
+            /* Acknowledge interrupt. */
+            rh850_cpu_do_interrupt(cs);
+        }
+        else
+        {
+            //qemu_log("[cpu] interrupt priority is masked\n");
+        }
+    }
+#endif
+
+    /* Interrupt request has been processed. */
+    cs->interrupt_request = 0;
+    return false;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+
+static int get_physical_address(CPURH850State *env, hwaddr *physical,
+                                int *prot, target_ulong addr,
+                                int access_type, int mmu_idx)
+{
+    
+        /*
+         * There is no memory virtualization in RH850 (at least for the targeted SoC)
+         * Address resolution is straightforward 
+         */
+        *physical = addr;
+        *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        return TRANSLATE_SUCCESS;
+
+}
+
+static void raise_mmu_exception(CPURH850State *env, target_ulong address,
+                                MMUAccessType access_type)
+{
+    CPUState *cs = CPU(rh850_env_get_cpu(env));
+    int page_fault_exceptions = RH850_EXCP_INST_PAGE_FAULT; 
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        cs->exception_index = page_fault_exceptions ?
+            RH850_EXCP_INST_PAGE_FAULT : RH850_EXCP_INST_ACCESS_FAULT;
+        break;
+    case MMU_DATA_LOAD:
+        cs->exception_index = page_fault_exceptions ?
+            RH850_EXCP_LOAD_PAGE_FAULT : RH850_EXCP_LOAD_ACCESS_FAULT;
+        break;
+    case MMU_DATA_STORE:
+        cs->exception_index = page_fault_exceptions ?
+            RH850_EXCP_STORE_PAGE_FAULT : RH850_EXCP_STORE_AMO_ACCESS_FAULT;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    env->badaddr = address;
+}
+
+hwaddr rh850_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
+{
+    RH850CPU *cpu = RH850_CPU(cs);
+    hwaddr phys_addr;
+    int prot;
+    int mmu_idx = cpu_mmu_index(&cpu->env, false);
+
+    if (get_physical_address(&cpu->env, &phys_addr, &prot, addr, 0, mmu_idx)) {
+        return -1;
+    }
+    return phys_addr;
+}
+
+void rh850_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                   MMUAccessType access_type, int mmu_idx,
+                                   uintptr_t retaddr)
+{
+    RH850CPU *cpu = RH850_CPU(cs);
+    CPURH850State *env = &cpu->env;
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        cs->exception_index = RH850_EXCP_INST_ADDR_MIS;
+        break;
+    case MMU_DATA_LOAD:
+        cs->exception_index = RH850_EXCP_LOAD_ADDR_MIS;
+        break;
+    case MMU_DATA_STORE:
+        cs->exception_index = RH850_EXCP_STORE_AMO_ADDR_MIS;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    env->badaddr = addr;
+    //qemu_log_mask(CPU_LOG_INT, "%s\n", __func__);
+    do_raise_exception_err(env, cs->exception_index, retaddr);
+}
+
+#endif
+
+int rh850_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size,
+        int rw, int mmu_idx)
+{
+
+
+    /*
+     * TODO: Add check to system register concerning MPU configuratuon MPLA, MPUA
+     *
+     */
+    RH850CPU *cpu = RH850_CPU(cs);
+    CPURH850State *env = &cpu->env;
+#if !defined(CONFIG_USER_ONLY)
+    hwaddr pa = 0;
+    int prot;
+#endif
+    int ret = TRANSLATE_FAIL;
+    qemu_log_mask(CPU_LOG_MMU,
+            "%s pc " TARGET_FMT_lx " ad %" VADDR_PRIx " rw %d mmu_idx \
+             %d\n", __func__, env->pc, address, rw, mmu_idx);
+
+#if !defined(CONFIG_USER_ONLY)
+
+    ret = get_physical_address(env, &pa, &prot, address, rw, mmu_idx);
+    qemu_log_mask(CPU_LOG_MMU,
+            "%s address=%" VADDR_PRIx " ret %d physical " TARGET_FMT_plx
+             " prot %d\n", __func__, address, ret, pa, prot);
+    if (ret == TRANSLATE_SUCCESS) {
+        tlb_set_page(cs, address & TARGET_PAGE_MASK, pa & TARGET_PAGE_MASK,
+                     prot, mmu_idx, TARGET_PAGE_SIZE);
+    } else if (ret == TRANSLATE_FAIL) {
+        raise_mmu_exception(env, address, rw);
+    }
+#else
+    switch (rw) {
+    case MMU_INST_FETCH:
+        cs->exception_index = RH850_EXCP_INST_PAGE_FAULT;
+        break;
+    case MMU_DATA_LOAD:
+        cs->exception_index = RH850_EXCP_LOAD_PAGE_FAULT;
+        break;
+    case MMU_DATA_STORE:
+        cs->exception_index = RH850_EXCP_STORE_PAGE_FAULT;
+        break;
+    }
+#endif
+    return ret;
+}
+
+
+uint32_t mem_deref_4(CPUState * cs, uint32_t addr){
+          uint8_t * buf = g_malloc(4); 
+          uint32_t ret_dword = 0;
+          cpu_memory_rw_debug(cs, addr,  buf, 4, false); 
+          
+          ret_dword |= buf[3] << 24;
+          ret_dword |= buf[2] << 16;
+          ret_dword |= buf[1] << 8; 
+          ret_dword |= buf[0]; 
+          g_free(buf); 
+          return ret_dword; 
+}
+
+
+void rh850_cpu_do_interrupt(CPUState *cs)
+{
+
+    //qemu_log("[cpu] rh850_cpu_do_interrupt()\n");
+    //qemu_log_mask(CPU_LOG_INT, "%s\n", __func__);
+#if !defined(CONFIG_USER_ONLY)
+    uint32_t intbp;
+    RH850CPU *cpu = RH850_CPU(cs);
+    CPURH850State *env = &cpu->env;
+
+    uint32_t direct_vector_ba; 
+    qemu_log_mask(CPU_LOG_INT, "%s: entering switch\n", __func__);
+    switch (cs->exception_index) {
+        case RH850_EXCP_FETRAP: 
+
+            qemu_log_mask(CPU_LOG_INT, "%s: entering FETRAP handler\n", __func__);
+            // store PSW to FEPSW (and update env->EBV_flag)
+            env->systemRegs[BANK_ID_BASIC_0][FEPSW_IDX] = psw2int(env);
+            // store PC to FEPC
+            env->systemRegs[BANK_ID_BASIC_0][FEPC_IDX] = env->pc+2;
+            // Set Exception Cause
+		    env->systemRegs[BANK_ID_BASIC_0][FEIC_IDX] = env->exception_cause;
+
+            qemu_log_mask(CPU_LOG_INT, "%s, saved pc : %x\n", __func__,env->pc);
+
+            // update PSW
+            env->UM_flag = 0;
+            env->NP_flag = 1;
+            env->EP_flag = 1;
+            env->ID_flag = 1;
+
+            // modify PC, keep RBASE or EBASE bits 9 to 31 (discard bits 0 to 8)
+            if (env->EBV_flag) 
+                direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1] & 0xFFFFFE00;
+            else
+                direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1] & 0xFFFFFE00; 
+    
+            qemu_log_mask(CPU_LOG_INT, "%s: direct vector addr : %x \n", __func__,direct_vector_ba);
+            env->pc = direct_vector_ba + 0x30; 
+            break; 
+        
+        case RH850_EXCP_TRAP:
+            qemu_log_mask(CPU_LOG_INT, "%s: entering TRAP handler\n", __func__);
+            // store PSW to EIPSW
+            env->systemRegs[BANK_ID_BASIC_0][EIPSW_IDX] = psw2int(env);
+            // store PC to EIPC
+            env->systemRegs[BANK_ID_BASIC_0][EIPC_IDX] = env->pc+4;
+            // Set Exception Cause
+            env->systemRegs[BANK_ID_BASIC_0][EIIC_IDX] = env->exception_cause;
+
+            env->UM_flag = 0;
+            env->EP_flag = 1;
+            env->ID_flag = 1;
+
+            // modify PC, keep RBASE or EBASE bits 9 to 31 (discard bits 0 to 8)
+            if (env->EBV_flag)
+                direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1] & 0xFFFFFE00;
+            else
+                direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1] & 0xFFFFFE00; 
+
+            if (env->exception_cause < 0x50) {
+            env->pc = direct_vector_ba + 0x40; 
+            } else {
+            env->pc = direct_vector_ba + 0x50; 
+            }
+            break; 
+
+        case RH850_EXCP_RIE:
+            //qemu_log("%s: entering RIE handler\n", __func__);
+            // store PSW to FEPSW
+            env->systemRegs[BANK_ID_BASIC_0][FEPSW_IDX] = psw2int(env);
+            // store PC to FEPC
+            env->systemRegs[BANK_ID_BASIC_0][FEPC_IDX] = env->pc;
+            // Set Exception Cause
+                env->systemRegs[BANK_ID_BASIC_0][FEIC_IDX] = env->exception_cause;
+            //qemu_log("%s, saved pc : %x\n", __func__,env->pc);
+            // update PSW
+
+            env->UM_flag = 0;
+            env->NP_flag = 1;
+            env->EP_flag = 1;
+            env->ID_flag = 1;
+
+            // modify PC, keep RBASE or EBASE bits 9 to 31 (discard bits 0 to 8)
+            if (env->EBV_flag) 
+                direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1] & 0xFFFFFE00;
+            else
+                direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1] & 0xFFFFFE00; 
+
+            //qemu_log("%s: direct vector addr : %x \n", __func__,direct_vector_ba);
+            env->pc = direct_vector_ba + 0x60;
+            //qemu_log("%s: pc : 0x%08x \n", __func__, direct_vector_ba+0x60); 
+            break;
+
+        case RH850_EXCP_SYSCALL:
+          qemu_log_mask(CPU_LOG_INT, "%s: entering SYSCALL handler\n", __func__);
+          uint32_t syscall_cfg = env->systemRegs[BANK_ID_BASIC_1][SCCFG_IDX1] & 0xff;
+          uint32_t syscall_number = env->exception_cause - 0x8000; 
+          uint32_t syscall_bp = env->systemRegs[BANK_ID_BASIC_1][SCBP_IDX1]; 
+          uint32_t handler_offset=0, deref_addr=0;
+          
+          if (syscall_number <= syscall_cfg) {
+            deref_addr = syscall_bp + (syscall_number<<2); 
+          } else {
+
+            deref_addr = syscall_bp; 
+          }
+
+          qemu_log_mask(CPU_LOG_INT, "%s syscall_cfg_size = %d\n", __func__,syscall_cfg);
+          qemu_log_mask(CPU_LOG_INT, "%s syscall_bp = %d\n", __func__,syscall_bp);
+          qemu_log_mask(CPU_LOG_INT, "%s syscall_num = %d\n", __func__,syscall_number);
+          qemu_log_mask(CPU_LOG_INT, "%s deref_addr = 0x%x\n", __func__,deref_addr);
+          handler_offset = mem_deref_4(cs,deref_addr); 
+          qemu_log_mask(CPU_LOG_INT, "%s handler offset = %x\n", __func__,handler_offset);
+
+          // store PSW to EIPSW
+          env->systemRegs[BANK_ID_BASIC_0][EIPSW_IDX] = psw2int(env);
+          // store PC to EIPC
+          env->systemRegs[BANK_ID_BASIC_0][EIPC_IDX] = env->pc+4;
+          // Set Exception Cause
+		  env->systemRegs[BANK_ID_BASIC_0][EIIC_IDX] = env->exception_cause;
+
+          env->UM_flag = 0;
+          env->EP_flag = 1;
+          env->ID_flag = 1;
+
+          // modify PC 
+          env->pc = syscall_bp + handler_offset; 
+          qemu_log_mask(CPU_LOG_INT, "%s: moving pc to = 0x%x\n", __func__,env->pc);
+          
+          break; 
+
+        case RH850_EXCP_FEINT:
+            //qemu_log("[cpu] entering FEINT handler\n");
+            // store PSW to FEPSW
+            env->systemRegs[BANK_ID_BASIC_0][FEPSW_IDX] = psw2int(env);
+            // store PC to FEPC
+            env->systemRegs[BANK_ID_BASIC_0][FEPC_IDX] = env->pc;
+            // Set Exception Cause
+            env->systemRegs[BANK_ID_BASIC_0][FEIC_IDX] = env->exception_cause;
+
+            /* Update PSW. */
+            env->UM_flag = 0;
+            env->ID_flag = 1;
+            env->NP_flag = 1;
+            env->EP_flag = 0;
+
+            /* Direct vector. */
+            if (env->EBV_flag) 
+                direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1];
+            else
+                direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1]; 
+           
+            /* Redirect to FEINT exception handler. */
+            env->pc = (direct_vector_ba & 0xFFFFFF00) + 0xF0;  
+            //qemu_log("%s: moving pc to = 0x%x\n", __func__,env->pc);
+            break;
+
+        case RH850_EXCP_FENMI:
+            //qemu_log("[cpu] entering FENMI handler\n");
+            // store PSW to FEPSW
+            env->systemRegs[BANK_ID_BASIC_0][FEPSW_IDX] = psw2int(env);
+            // store PC to FEPC
+            env->systemRegs[BANK_ID_BASIC_0][FEPC_IDX] = env->pc;
+            // Set Exception Cause
+            env->systemRegs[BANK_ID_BASIC_0][FEIC_IDX] = env->exception_cause;
+
+            /* Update PSW. */
+            env->UM_flag = 0;
+            env->ID_flag = 1;
+            env->NP_flag = 1;
+            env->EP_flag = 0;
+
+            /* Direct vector. */
+            if (env->EBV_flag) 
+                direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1];
+            else
+                direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1]; 
+           
+            /* Redirect to FENMI exception handler. */
+            env->pc = (direct_vector_ba & 0xFFFFFF00) + 0xE0;  
+            break;
+
+        case RH850_EXCP_EIINT:
+            //qemu_log("[cpu] entering EIINT handler\n");
+            //qemu_log_mask(CPU_LOG_INT, "%s: entering EIINT handler\n", __func__);
+
+            // store PSW to EIPSW
+            env->systemRegs[BANK_ID_BASIC_0][EIPSW_IDX] = psw2int(env);
+            // store PC to EIPC
+            env->systemRegs[BANK_ID_BASIC_0][EIPC_IDX] = env->pc;
+            // Set Exception Cause
+            env->systemRegs[BANK_ID_BASIC_0][EIIC_IDX] = env->exception_cause;
+            // Set priority to ISPR
+            env->systemRegs[BANK_ID_BASIC_2][ISPR_IDX2] |= (1 << env->exception_priority);
+
+            /* Set PSW.ID (disable further EI exceptions). */
+            env->ID_flag = 1;
+
+            /* Clear PSW.EP (we are processing an interrupt). */
+            env->EP_flag = 0;
+
+            /* Modify PC based on dispatch method (direct vector or table reference). */
+            if (!env->exception_dv)
+            {
+                //qemu_log("[cpu] dispatch EIINT (table reference) for IRQ %d\n", env->exception_cause&0x1ff);
+                /* Table reference, first read INTBP value. */
+                intbp = env->systemRegs[BANK_ID_BASIC_1][INTBP_IDX1];
+                //qemu_log("[cpu] INTBP=0x%08x\n", intbp);
+
+                /* Compute address of interrupt handler (based on channel). */
+                env->pc = mem_deref_4(cs, intbp + 4*(env->exception_cause & 0x1ff));
+                //qemu_log("[cpu] PC=0x%08x\n", env->pc);
+            }
+            else
+            {
+                //qemu_log("[cpu] dispatch EIINT (direct vector) for IRQ %d\n", env->exception_cause&0x1ff);
+                //qemu_log("[cpu] exception priority=%d\n", env->exception_priority);
+                /* Direct vector. */
+                if (env->EBV_flag) 
+                    direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][EBASE_IDX1];
+                else
+                    direct_vector_ba = env->systemRegs[BANK_ID_BASIC_1][RBASE_IDX1]; 
+                //qemu_log("[cpu] Direct vector Base Address = 0x%08x\n", direct_vector_ba);
+               
+                /* Is RINT bit set ? */
+                if (direct_vector_ba & 1)
+                {
+                    //qemu_log("[cpu] RINT bit set\n");
+                    /* Reduced vector (one handler for any priority). */
+                    env->pc = (direct_vector_ba & 0xFFFFFF00) + 0x100; 
+                }
+                else
+                {
+                    //qemu_log("[cpu] RINT bit NOT set\n");
+                    /* One handler per priority level. */
+                    env->pc = (direct_vector_ba & 0xFFFFFF00) + 0x100 + (env->exception_priority<<4); 
+                }
+                //qemu_log("[cpu] PC=0x%08x\n", env->pc);
+            }
+            break;
+      }
+      
+#endif
+    cs->exception_index = EXCP_NONE; /* mark handled to qemu */
+}
diff --git a/qemu/target/rh850/helper.h b/qemu/target/rh850/helper.h
new file mode 100644
index 0000000000..24c9fa5865
--- /dev/null
+++ b/qemu/target/rh850/helper.h
@@ -0,0 +1,157 @@
+DEF_HELPER_4(uc_tracecode, void, i32, i32, ptr, i64)
+DEF_HELPER_6(uc_traceopcode, void, ptr, i64, i64, i32, ptr, i64)
+DEF_HELPER_1(uc_rh850_exit, void, env)
+
+/* Exceptions */
+DEF_HELPER_2(raise_exception, noreturn, env, i32)
+DEF_HELPER_3(raise_exception_with_cause, noreturn, env, i32, i32)
+
+
+/* Floating Point - rounding mode */
+DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_WG, void, env, i32)
+
+/* Floating Point - fused */
+DEF_HELPER_FLAGS_4(fmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fmadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fmsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fmsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fnmsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fnmsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fnmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fnmadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+
+/* Floating Point - Single Precision */
+DEF_HELPER_FLAGS_2(f32_is_normal, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(f32_is_zero_or_normal, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(f32_is_infinity, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_1(f_sync_fflags, TCG_CALL_NO_RWG, void, env)
+
+DEF_HELPER_FLAGS_3(fadd_s, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fsub_s, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fmul_s, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fmax_s, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fmin_s, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(fdiv_s, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_2(fabs_s, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fneg_s, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(ftrnc_sw, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fceil_sw, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(ffloor_sw, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_sw, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(ftrnc_suw, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fceil_suw, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(ffloor_suw, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_suw, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_ws, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_ls, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_hs, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_sh, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_uws, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_uls, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_2(ftrnc_sl, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(fceil_sl, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(ffloor_sl, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_sl, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(ftrnc_sul, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(fceil_sul, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(ffloor_sul, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_sul, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(fsqrt_s, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(frecip_s, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(frsqrt_s, TCG_CALL_NO_RWG, i32, env, i32)
+
+DEF_HELPER_FLAGS_2(f_is_nan_s, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_3(fle_s, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(flt_s, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(feq_s, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_2(fcvt_w_s, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_wu_s, TCG_CALL_NO_RWG, tl, env, i64)
+
+DEF_HELPER_FLAGS_4(fmaf_s, TCG_CALL_NO_RWG, i32, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(fmsf_s, TCG_CALL_NO_RWG, i32, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(fnmaf_s, TCG_CALL_NO_RWG, i32, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(fnmsf_s, TCG_CALL_NO_RWG, i32, env, i32, i32, i32)
+
+
+
+
+#if defined(TARGET_RH85064)
+DEF_HELPER_FLAGS_2(fcvt_l_s, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_lu_s, TCG_CALL_NO_RWG, tl, env, i64)
+#endif
+DEF_HELPER_FLAGS_2(fcvt_s_w, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_s_wu, TCG_CALL_NO_RWG, i64, env, tl)
+#if defined(TARGET_RH85064)
+DEF_HELPER_FLAGS_2(fcvt_s_l, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_s_lu, TCG_CALL_NO_RWG, i64, env, tl)
+#endif
+DEF_HELPER_FLAGS_1(fclass_s, TCG_CALL_NO_RWG_SE, tl, i64)
+
+/* Floating Point - Double Precision */
+DEF_HELPER_FLAGS_3(fadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmul_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmax_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmin_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fdiv_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_2(fabs_d, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fneg_d, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_FLAGS_2(ftrnc_dw, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_2(fceil_dw, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_2(ffloor_dw, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_dw, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_2(ftrnc_duw, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_2(fceil_duw, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_2(ffloor_duw, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_duw, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_wd, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_ld, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_sd, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_uwd, TCG_CALL_NO_RWG, i64, env, i32)
+DEF_HELPER_FLAGS_2(fcvt_uld, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(ftrnc_dl, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fceil_dl, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(ffloor_dl, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_dl, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(ftrnc_dul, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fceil_dul, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(ffloor_dul, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_dul, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fsqrt_d, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(frecip_d, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(frsqrt_d, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_FLAGS_2(f_is_nan_d, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_FLAGS_3(fle_d, TCG_CALL_NO_RWG, i32, env, i64, i64)
+DEF_HELPER_FLAGS_3(flt_d, TCG_CALL_NO_RWG, i32, env, i64, i64)
+DEF_HELPER_FLAGS_3(feq_d, TCG_CALL_NO_RWG, i32, env, i64, i64)
+
+
+
+DEF_HELPER_FLAGS_2(fcvt_s_d, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_d_s, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_w_d, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_wu_d, TCG_CALL_NO_RWG, tl, env, i64)
+#if defined(TARGET_RH85064)
+DEF_HELPER_FLAGS_2(fcvt_l_d, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_lu_d, TCG_CALL_NO_RWG, tl, env, i64)
+#endif
+DEF_HELPER_FLAGS_2(fcvt_d_w, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_d_wu, TCG_CALL_NO_RWG, i64, env, tl)
+#if defined(TARGET_RH85064)
+DEF_HELPER_FLAGS_2(fcvt_d_l, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_d_lu, TCG_CALL_NO_RWG, i64, env, tl)
+#endif
+DEF_HELPER_FLAGS_1(fclass_d, TCG_CALL_NO_RWG_SE, tl, i64)
+
+/* Special functions */
+//DEF_HELPER_3(csrrw, tl, env, tl, tl)
+//DEF_HELPER_4(csrrs, tl, env, tl, tl, tl)
+//DEF_HELPER_4(csrrc, tl, env, tl, tl, tl)
+#ifndef CONFIG_USER_ONLY
+//DEF_HELPER_2(sret, tl, env, tl)
+//DEF_HELPER_2(mret, tl, env, tl)
+//DEF_HELPER_1(wfi, void, env)
+DEF_HELPER_1(tlb_flush, void, env)
+#endif
diff --git a/qemu/target/rh850/instmap.h b/qemu/target/rh850/instmap.h
new file mode 100644
index 0000000000..2cbf2aed2f
--- /dev/null
+++ b/qemu/target/rh850/instmap.h
@@ -0,0 +1,624 @@
+/*
+ * RH850 emulation for qemu: Instruction decode helpers
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _RH850_INSTMAP_H
+#define _RH850_INSTMAP_H
+
+enum{
+	/*SIGNED INT*/
+	COND_RH850_BGE = 1110,
+	COND_RH850_BGT = 1111,
+	COND_RH850_BLE = 0111,
+	COND_RH850_BLT = 0110,
+	/*UNSIGNED INT*/
+	COND_RH850_BH = 1011,
+	COND_RH850_BL = 0001,
+	COND_RH850_BNH = 0011,
+	COND_RH850_BNL = 1001,
+	/*COMMON*/
+	COND_RH850_BE = 0010,
+	COND_RH850_BNE = 1010,
+	/*OTHERS*/
+	COND_RH850_BC = 0001,
+	COND_RH850_BF = 1010,
+	COND_RH850_BN = 0100,
+	COND_RH850_BNC = 1001,
+	COND_RH850_BNV = 1000,
+	COND_RH850_BNZ = 1010,
+	COND_RH850_BP = 1100,
+	COND_RH850_BR = 0101,
+	COND_RH850_BSA = 1101,
+	COND_RH850_BT = 0010,
+	COND_RH850_BV = 0000,
+	COND_RH850_BZ = 0010,
+};
+
+#define MASK_OP_MAJOR(op)  (op & (0x3F << 5)) // the major opcode in rh850 is at bits 10-5
+enum {
+	/* FORMAT I */						// unique opcodes and grouped instructions
+	OPC_RH850_16bit_0 = (0x0 << 5),		// group with opcode 0x0 (nop, synci, synce, syncm, syncp, mov)
+	OPC_RH850_NOT_reg1_reg2 	= (0x1 << 5),
+	OPC_RH850_16bit_2 = (0x2 << 5),		// group with opcode 0x2 (rie, switch, divh, fetrap)
+	OPC_RH850_16bit_3 = (0x3 << 5), 	// group with opcode 0x3 (jmp,sld.bu,sld.hu)
+	OPC_RH850_16bit_4 = (0x4 << 5),		// group with opcode 0x4 (zyb, satsub)
+	OPC_RH850_16bit_5 = (0x5 << 5),		// group with opcode 0x5 (sxb, satsub)
+	OPC_RH850_16bit_6 = (0x6 << 5),		// group with opcode 0x6 (zyh, satadd)
+	OPC_RH850_16bit_7 = (0x7 << 5),		// group with opcode 0x7 (sxh, mulh)
+	OPC_RH850_OR_reg1_reg2 	= (0x8 << 5),
+	OPC_RH850_XOR_reg1_reg2 	= (0x9 << 5),
+	OPC_RH850_AND_reg1_reg2 	= (0xA << 5),
+	OPC_RH850_TST_reg1_reg2 	= (0xB << 5),
+	OPC_RH850_SUBR_reg1_reg2 	= (0xC << 5),
+	OPC_RH850_SUB_reg1_reg2 	= (0xD << 5),
+	OPC_RH850_ADD_reg1_reg2 	= (0xE << 5),
+	OPC_RH850_CMP_reg1_reg2 = (0xF << 5),
+
+	/* FORMAT II */
+	OPC_RH850_16bit_16 = (0x10 << 5),	// group with opcode 0x10 (mov,callt)
+	OPC_RH850_16bit_17 = (0x11 << 5),	// group with opcode 0x11 (callt, satadd)
+	OPC_RH850_ADD_imm5_reg2= (0x12 << 5),   // group with opcode 0x12 (add)
+	OPC_RH850_CMP_imm5_reg2 = (0x13 << 5),	// group with opcode 0x13 (cmp)
+	OPC_RH850_SHR_imm5_reg2 = (0x14 << 5),
+	OPC_RH850_SAR_imm5_reg2 = (0x15 << 5),
+	OPC_RH850_SHL_imm5_reg2 = (0x16 << 5),
+	OPC_RH850_MULH_imm5_reg2 = (0x17 << 5),
+
+	/*FORMAT III */
+	OPC_RH850_BCOND = (0xB << 7), 	// different mask! (bits 10-7)
+
+	/* FORMAT IV */					// different mask! (bits 10-7)
+	OPC_RH850_16bit_SLDB = (0x6 << 5),
+	OPC_RH850_16bit_SLDH = (0x8 << 5),
+	OPC_RH850_16bit_IV10 = (0xA << 5), 		// group with opcode 0xA (sld.w,sst.w)
+	OPC_RH850_16bit_SSTB = (0x7 << 5),
+	OPC_RH850_16bit_SSTH = (0x9 << 5),
+
+	/* FORMAT VI */
+	OPC_RH850_ADDI_imm16_reg1_reg2	=	(0x30 << 5),
+	OPC_RH850_ANDI_imm16_reg1_reg2	=	(0x36 << 5),
+	OPC_RH850_MOVEA	=	(0x31 << 5),     	// this is also MOV 3, which is 48 bit
+	OPC_RH850_MOVHI_imm16_reg1_reg2	=	(0x32 << 5),
+	OPC_RH850_ORI_imm16_reg1_reg2	=	(0x34 << 5),
+	OPC_RH850_SATSUBI_imm16_reg1_reg2=	(0x33 << 5),
+	OPC_RH850_XORI_imm16_reg1_reg2	=	(0x35 << 5),
+
+
+	/* FORMAT VII */
+
+	OPC_RH850_LOOP	=	(0x37 << 5), 		//same as MULHI in format VI !!!!
+
+	OPC_RH850_LDB 	  = (0x38 << 5),
+	OPC_RH850_LDH_LDW = (0x39 << 5),
+	OPC_RH850_STB 	  = (0x3A << 5),
+	OPC_RH850_STH_STW = (0x3B << 5), 	//the store halfword and store word instructions differ on LSB displacement bit 16 (0=ST.H, 1=ST.W) (format VII)
+
+	OPC_RH850_ST_LD_0 = (0x3C << 5), 	//5 instructions share this opcode, sub-op bits 11-15 are 0, inst. differ in sub-op bits 16-19 (ST.B2=D, ST.W2=F) (format XIV)
+	OPC_RH850_ST_LD_1 = (0x3D << 5), 	//5 instructions share this opcode, sub-op bits 11-15 are 0, inst. differ in sub-op bits 16-19 (ST.DW=F, ST.H2=D) (format XIV)
+	//OPC_RH850_LDHU  = (0x3F << 5),	//bits 11-15 are not all 0
+
+	OPC_RH850_32bit_1 = (0x3F << 5),	// 111111
+
+
+
+
+	OPC_RH850_BIT_MANIPULATION_2	=	(0x3E << 5),
+
+	OPC_RH850_FORMAT_V_XIII = (0x1E << 6),
+
+
+	OPC_RH850_MULH1 = (0x7 << 5),
+	OPC_RH850_MULH2 = (0x17 << 5),
+
+
+};
+
+enum{
+	OPC_RH850_SET1_reg2_reg1	=	0,
+	OPC_RH850_NOT1_reg2_reg1	=	2,
+	OPC_RH850_CLR1_reg2_reg1	=	4,
+	OPC_RH850_TST1_reg2_reg1	=	6,
+};
+
+enum{
+	OPC_RH850_SET1_bit3_disp16_reg1	=	1,
+	OPC_RH850_NOT1_bit3_disp16_reg1	=	3,
+	OPC_RH850_CLR1_bit3_disp16_reg1	=	5,
+	OPC_RH850_TST1_bit3_disp16_reg1	=	7,
+};
+
+enum{
+	OPC_RH850_MOV_reg1_reg2		= 1,
+	OPC_RH850_MOV_imm5_reg2		= 2,
+	OPC_RH850_MOV_imm32_reg1	= 3,
+	OPC_RH850_MOVEA_imm16_reg1_reg2	= 4,
+};
+
+enum{
+	OPC_RH850_SATADD_reg1_reg2 		= 1,
+	OPC_RH850_SATADD_imm5_reg2 		= 2,
+	OPC_RH850_SATADD_reg1_reg2_reg3	= 3,
+	OPC_RH850_SATSUB_reg1_reg2		= 4,
+	OPC_RH850_SATSUB_reg1_reg2_reg3 = 5,
+	OPC_RH850_SATSUBR_reg1_reg2		= 6,
+};
+
+enum{
+	OPC_RH850_MUL_reg1_reg2_reg3	= 1,
+	OPC_RH850_MUL_imm9_reg2_reg3	= 2,
+	OPC_RH850_MULH_reg1_reg2		= 3,
+	//OPC_RH850_MULH_imm5_reg2		= 4,
+	OPC_RH850_MULHI_imm16_reg1_reg2	= 5,
+	OPC_RH850_MULU_reg1_reg2_reg3	= 8,
+	OPC_RH850_MULU_imm9_reg2_reg3	= 9,
+};
+
+enum{
+	OPC_RH850_ADF_cccc_reg1_reg2_reg3	= 10,
+	OPC_RH850_SBF_cccc_reg1_reg2_reg3	= 11,
+	OPC_RH850_DIVH_reg1_reg2			= 12,
+};
+
+enum{		//enum for gen_data_manipulation cases
+	OPC_RH850_SHR_reg1_reg2 		= 111,
+	OPC_RH850_SHR_reg1_reg2_reg3	= 222,
+	OPC_RH850_CMOV_cccc_reg1_reg2_reg3	= 333,
+	OPC_RH850_CMOV_cccc_imm5_reg2_reg3	= 444,
+	OPC_RH850_ROTL_reg1_reg2_reg3	= 445,
+	OPC_RH850_ROTL_imm5_reg2_reg3	= 446,
+	OPC_RH850_SAR_reg1_reg2			= 447,
+	OPC_RH850_SAR_reg1_reg2_reg3	= 448,
+	OPC_RH850_SASF_cccc_reg2		= 449,
+	OPC_RH850_SETF_cccc_reg2		= 450,
+	OPC_RH850_SHL_reg1_reg2			= 451,
+	OPC_RH850_SHL_reg1_reg2_reg3	= 453,
+	OPC_RH850_SXB_reg1				= 454,
+	OPC_RH850_SXH_reg1				= 455,
+	OPC_RH850_ZXB_reg1				= 456,
+	OPC_RH850_ZXH_reg1				= 457,
+
+
+
+};
+
+enum{
+	OPC_RH850_LDSR_reg2_regID_selID	= 1,
+	OPC_RH850_STSR_regID_reg2_selID = 2,
+	//check for unintentional matching
+	OPC_RH850_PREPARE_list12_imm5	= 12,
+	OPC_RH850_PREPARE_list12_imm5_sp	= 13,
+	OPC_RH850_RIE 					= 3,
+	OPC_RH850_CALLT_imm6			= 4,
+	OPC_RH850_CAXI_reg1_reg2_reg3	= 5,
+	OPC_RH850_DISPOSE_imm5_list12	= 7,
+	OPC_RH850_DISPOSE_imm5_list12_reg1 = 8,
+	OPC_RH850_FETRAP_vector4		= 15,
+	OPC_RH850_SWITCH_reg1			= 10,
+};
+
+enum{ // magic numbers for branch opcodes
+	OPC_RH850_JR_imm22			= 0,
+	OPC_RH850_JR_imm32			= 1,
+	OPC_RH850_JARL_disp22_reg2	= 2,
+	OPC_RH850_JARL_disp32_reg1	= 3, //48-bit
+	OPC_RH850_JARL_reg1_reg3	= 4,
+	OPC_RH850_JMP_reg1			= 5,
+	OPC_RH850_JMP_disp32_reg1	= 6,
+
+};
+
+
+#define MASK_OP_FORMAT_I_0(op)	(MASK_OP_MAJOR(op) | (op & (0x1F << 11)) | (op & (0x1F << 0)))
+enum {
+	OPC_RH850_NOP 	= OPC_RH850_16bit_0 | (0x0 << 11) | (0x0 << 0),
+	OPC_RH850_SYNCI = OPC_RH850_16bit_0 | (0x0 << 11) | (0x1C << 0),
+	OPC_RH850_SYNCE = OPC_RH850_16bit_0 | (0x0 << 11) | (0x1D << 0),
+	OPC_RH850_SYNCM = OPC_RH850_16bit_0 | (0x0 << 11) | (0x1E << 0),
+	OPC_RH850_SYNCP = OPC_RH850_16bit_0 | (0x0 << 11) | (0x1F << 0)
+};
+
+
+
+#define MASK_OP_ST_LD0(op)   (MASK_OP_MAJOR(op) | (op & (0x1F << 11)) | (op & (0xF << 16)))
+enum {
+
+	OPC_RH850_LDB2 	= OPC_RH850_ST_LD_0 | (0x00 << 11 ) | (0x5 << 16),
+	OPC_RH850_LDH2 	= OPC_RH850_ST_LD_0 | (0x00 << 11 ) | (0x7 << 16),
+	OPC_RH850_LDW2 	= OPC_RH850_ST_LD_0 | (0x00 << 11 ) | (0x9 << 16),
+	OPC_RH850_STB2 	= OPC_RH850_ST_LD_0 | (0x00 << 11 ) | (0xD << 16),		//sub-op bits 11-15 are 0, inst. differ in sub-op bits 16-19 (ST.B2=D, ST.W2=F) (format XIV)
+	OPC_RH850_STW2	= OPC_RH850_ST_LD_0 | (0x00 << 11 ) | (0xF << 16),
+
+};
+#define MASK_OP_ST_LD1(op)   (MASK_OP_MAJOR(op) | (op & (0x1F << 11)) | (op & (0xF << 16)))
+enum {
+
+	OPC_RH850_LDBU2 = OPC_RH850_ST_LD_1 | (0x00 << 11 ) | (0x5 << 16),
+	OPC_RH850_LDHU2 = OPC_RH850_ST_LD_1 | (0x00 << 11 ) | (0x7 << 16),
+	OPC_RH850_LDDW 	= OPC_RH850_ST_LD_1 | (0x00 << 11 ) | (0x9 << 16),
+	OPC_RH850_STDW 	= OPC_RH850_ST_LD_1 | (0x00 << 11 ) | (0xF << 16),
+	OPC_RH850_STH2 	= OPC_RH850_ST_LD_1 | (0x00 << 11 ) | (0xD << 16),
+};
+
+#define MASK_OP_32BIT_SUB(op)	(op & (0xF << 23))
+enum {
+	OPC_RH850_LDSR_RIE_SETF_STSR	=	(0x0 << 23),
+	OPC_RH850_FORMAT_IX				=	(0x1 << 23),	// 0001
+	OPC_RH850_FORMAT_X				=	(0x2 << 23),	// 0010
+	OPC_RH850_MUL_INSTS				=	(0x4 << 23),	// 0100 this is also for SASF
+	OPC_RH850_FORMAT_XI				=	(0x5 << 23),	// 0101
+	OPC_RH850_FORMAT_XII			=	(0x6 << 23),	// 0110
+	OPC_RH850_ADDIT_ARITH			=	(0x7 << 23),	// 0111
+	OPC_RH850_FORMAT_FI_CAT0		=   (0x8 << 23),	// 1000 used for floating-point instructions
+	OPC_RH850_FORMAT_FI_CAT1		= 	(0x9 << 23)		// 1001 used for specific FPU instructions
+};
+
+#define MASK_OP_FORMAT_IX(op) (op & (0x3 << 21))   //0001 on b26-b23
+enum {
+	OPC_RH850_BINS_0	= (0x0  << 21), //BINS0,SHR, SHR2
+	OPC_RH850_BINS_1	= (0x1  << 21), //BINS1,SAR,SAR2
+	OPC_RH850_BINS_2	= (0x2  << 21),	//BINS2,SHL, SHL2, ROTL, ROTL2
+	OPC_RH850_BIT_MANIPULATION		= (0x3  << 21),	//clr1, set, tst1, not1, caxi in format IX
+};
+
+#define MASK_OP_FORMAT_X(op) (op & (0xFFF << 11))	//0010 on b26-b23
+enum {
+	OPC_RH850_CTRET		= 	(0x880 << 11),
+	OPC_RH850_DI		= 	(0xC00 << 11),
+	OPC_RH850_EI		= 	(0XC10 << 11),
+	OPC_RH850_EIRET		= 	(0X900 << 11),
+	OPC_RH850_FERET		= 	(0X940 << 11),
+	OPC_RH850_HALT		= 	(0X400 << 11),
+	OPC_RH850_JARL3		= 	(0XC18 << 11),
+	OPC_RH850_SNOOZE	= 	(0x401 << 11),
+	OPC_RH850_SYSCALL	= 	(0xC1A << 11),
+	OPC_RH850_TRAP		= 	(0x000 << 11),
+	OPC_RH850_PREF		= 	(0xC1B << 11),
+	OPC_RH850_POPSP_rh_rt	= 	(0xC0C << 11),
+	OPC_RH850_PUSHSP_rh_rt	= 	(0xC08 << 11),
+	//don't forget CACHE
+	OPC_RH850_CLL	= 	(0xC1F << 11),
+
+};
+
+#define MASK_OP_FORMAT_XI(op) (op & (0x7F << 16))
+enum {
+	OPC_RH850_DIVH_reg1_reg2_reg3 	= 0x0,
+	OPC_RH850_DIVHU_reg1_reg2_reg3 	= 0x2,
+	OPC_RH850_DIV_reg1_reg2_reg3 	= 0x40,
+	OPC_RH850_DIVQ 	= 0x7C,
+	OPC_RH850_DIVQU	= 0x7E,
+	OPC_RH850_DIVU_reg1_reg2_reg3	= 0x42
+};
+
+#define MASK_OP_FORMAT_XII(op) (op & (0x3 << 17))
+enum {
+	OPC_RH850_BSW_reg2_reg3	= (0x0 << 0),
+	OPC_RH850_BSH_reg2_reg3 = (0x1 << 0),
+	OPC_RH850_HSW_reg2_reg3	= (0x2 << 0),
+	OPC_RH850_HSH_reg2_reg3	= (0x3 << 0),
+	// SCHOL, SCHOR, SCH1L, SCH1R
+	OPC_RH850_SCH0R_reg2_reg3	= (0x0 << 0),
+	OPC_RH850_SCH1R_reg2_reg3	= (0x1 << 0), //this is also STCW
+	OPC_RH850_SCH0L_reg2_reg3	= (0x2 << 0),
+	OPC_RH850_SCH1L_reg2_reg3	= (0x3 << 0),
+
+
+};
+
+#define MASK_ADDIT_ARITH_OP(op) (op & (0x3 << 21))
+enum {
+	OPC_RH850_SBF_SATSUB	= 0x0,
+	OPC_RH850_ADF_SATADD3	= 0x1,
+	OPC_RH850_MAC_reg1_reg2_reg3_reg4	= 0x2,
+	OPC_RH850_MACU_reg1_reg2_reg3_reg4	= 0x3,
+};
+
+/*
+ * FPU instruction format (F:I)
+ */
+
+enum {
+	FPU_TYPE_S,
+	FPU_TYPE_D,
+	FPU_TYPE_LS,
+	FPU_TYPE_LD,
+	FPU_TYPE_DL,
+	FPU_TYPE_SD,
+	FPU_TYPE_SL,
+	FPU_TYPE_DW,
+	FPU_TYPE_WD,
+	FPU_TYPE_HS,
+	FPU_TYPE_SH,
+	FPU_TYPE_SW,
+	FPU_TYPE_WS,
+	FPU_TYPE_DUW,
+	FPU_TYPE_SUW,
+	FPU_TYPE_UWD,
+	FPU_TYPE_UWS,
+	FPU_TYPE_ULD,
+	FPU_TYPE_ULS,
+	FPU_TYPE_SUL,
+	FPU_TYPE_DUL
+};
+
+enum {
+	FPU_OP_ABS,
+	FPU_OP_ADD,
+	FPU_OP_CEIL,
+	FPU_OP_CVT,
+	FPU_OP_DIV,
+	FPU_OP_FLOOR,
+	FPU_OP_CMOV,
+	FPU_OP_CMP,
+	FPU_OP_MAX,
+	FPU_OP_MIN,
+	FPU_OP_MUL,
+	FPU_OP_NEG,
+	FPU_OP_RECIP,
+	FPU_OP_RSQRT,
+	FPU_OP_SQRT,
+	FPU_OP_SUB,
+	FPU_OP_TRNC
+};
+
+#define MASK_OP_FORMAT_FI(op) (op & (0x7F << 16))
+enum {
+	OPC_RH850_FPU_CMOV_S_OR_TRFSR	=   0x00 << 16,
+	OPC_RH850_FPU_CMOV_D	=   0x10 << 16,
+	OPC_RH850_FPU_CMP_S		=   0x20 << 16,
+	OPC_RH850_FPU_CMP_D		=   0x30 << 16,
+	OPC_RH850_FPU_GROUP_CMPD=	0x30 << 16,
+	OPC_RH850_FPU_GROUP_SW	=	0x40 << 16,
+	OPC_RH850_FPU_GROUP_DS	=	0x42 << 16,
+	OPC_RH850_FPU_GROUP_SL	=	0x44 << 16,
+	OPC_RH850_FPU_GROUP_ABSS	=	0x48 << 16,
+	OPC_RH850_FPU_GROUP_S	=	0x4E << 16,
+	OPC_RH850_FPU_GROUP_DW	=	0x50 << 16,
+	OPC_RH850_FPU_GROUP_DD	= 	0x52 << 16,
+	OPC_RH850_FPU_GROUP_DL	=	0x54 << 16,
+	OPC_RH850_FPU_GROUP_ABSD	=	0x58 << 16,
+	OPC_RH850_FPU_GROUP_D	=	0x5E << 16,
+	OPC_RH850_FPU_ADDF_S	=	0x60 << 16,
+	OPC_RH850_FPU_SUBF_S	=	0x62 << 16,
+	OPC_RH850_FPU_MULF_S	=	0x64 << 16,
+	OPC_RH850_FPU_MAXF_S	=	0x68 << 16,
+	OPC_RH850_FPU_MINF_S	=	0x6A << 16,
+	OPC_RH850_FPU_DIVF_S	=	0x6E << 16,
+	OPC_RH850_FPU_ADDF_D	=	0x70 << 16,
+	OPC_RH850_FPU_SUBF_D	=	0x72 << 16,
+	OPC_RH850_FPU_MULF_D	=	0x74 << 16,
+	OPC_RH850_FPU_MAXF_D	=	0x78 << 16,
+	OPC_RH850_FPU_MINF_D	=	0x7A << 16,
+	OPC_RH850_FPU_DIVF_D	=	0x7E << 16
+};
+
+/* OPC_RH850_FPU_GROUP_CMPS/D, variant defined by cond reg3. */
+enum {
+	OPC_RH850_FPU_CMPS_F = 0x20,
+	OPC_RH850_FPU_CMPS_UN,
+	OPC_RH850_FPU_CMPS_EQ,
+	OPC_RH850_FPU_CMPS_UEQ,
+	OPC_RH850_FPU_CMPS_OLT,
+	OPC_RH850_FPU_CMPS_ULT,
+	OPC_RH850_FPU_CMPS_OLE,
+	OPC_RH850_FPU_CMPS_ULE,
+	OPC_RH850_FPU_CMPS_SF,
+	OPC_RH850_FPU_CMPS_NGLE,
+	OPC_RH850_FPU_CMPS_SEQ,
+	OPC_RH850_FPU_CMPS_NGL,
+	OPC_RH850_FPU_CMPS_LT,
+	OPC_RH850_FPU_CMPS_NGE,
+	OPC_RH850_FPU_CMPS_LE,
+	OPC_RH850_FPU_CMPS_NGT
+};
+
+/* OPC_RH850_FPU_GROUP_SW, variant defined by reg1 */
+enum {
+	OPC_RH850_FPU_TRNCF_SW=0x1,
+	OPC_RH850_FPU_CEILF_SW,
+	OPC_RH850_FPU_FLOORF_SW,
+	OPC_RH850_FPU_CVTF_SW,
+	OPC_RH850_FPU_TRNCF_SUW=0x11,
+	OPC_RH850_FPU_CEILF_SUW,
+	OPC_RH850_FPU_FLOORF_SUW,
+	OPC_RH850_FPU_CVTF_SUW=0x14
+};
+
+/* OPC_RH850_FPU_GROUP_DS, variant defined by reg1 */
+enum {
+	OPC_RH850_FPU_CVTF_WS=0x0,
+	OPC_RH850_FPU_CVTF_LS,
+	OPC_RH850_FPU_CVTF_HS,
+	OPC_RH850_FPU_CVTF_SH,
+	OPC_RH850_FPU_CVTF_UWS=0x10,
+	OPC_RH850_FPU_CVTF_ULS
+};
+
+/* OPC_RH850_FPU_GROUP_SL, variant defined by reg1 */
+enum {
+	OPC_RH850_FPU_TRNCF_SL	=	0x1,
+	OPC_RH850_FPU_CEILF_SL,
+	OPC_RH850_FPU_FLOORF_SL,
+	OPC_RH850_FPU_CVTF_SL,
+	OPC_RH850_FPU_TRNCF_SUL	=	0x11,
+	OPC_RH850_FPU_CEILF_SUL,
+	OPC_RH850_FPU_FLOORF_SUL,
+	OPC_RH850_FPU_CVTF_SUL
+};
+
+/* OPC_RH850_FPU_GROUP_ABSS, variant defined by reg1 */
+enum {
+	OPC_RH850_FPU_ABSF_S	=	0x0,
+	OPC_RH850_FPU_NEGF_S,
+};
+
+/* OPC_RH850_FPU_GROUP_S, variant defined by reg1 */
+enum {
+	OPC_RH850_FPU_SQRTF_S	=	0x0,
+	OPC_RH850_FPU_RECIPF_S,
+	OPC_RH850_FPU_RSQRTF_S
+};
+
+
+/* OPC_RH850_FPU_GROUP_DW, variant defined by reg1 */
+enum {
+	OPC_RH850_FPU_TRNCF_DW	=	0x1,
+	OPC_RH850_FPU_CEILF_DW,
+	OPC_RH850_FPU_FLOORF_DW,
+	OPC_RH850_FPU_CVTF_DW,
+	OPC_RH850_FPU_TRNCF_DUW	=	0x11,
+	OPC_RH850_FPU_CEILF_DUW,
+	OPC_RH850_FPU_FLOORF_DUW,
+	OPC_RH850_FPU_CVTF_DUW
+};
+
+/* OPC_RH850_FPU_GROUP_DD, variant defined by reg1 */
+enum {
+	OPC_RH850_FPU_CVTF_WD	=	0x00,
+	OPC_RH850_FPU_CVTF_LD,
+	OPC_RH850_FPU_CVTF_SD,
+	OPC_RH850_FPU_CVTF_UWD	=	0x10,
+	OPC_RH850_FPU_CVTF_ULD
+};
+
+/* OPC_RH850_FPU_GROUP_DL, variant defined by reg1 */
+enum {
+	OPC_RH850_FPU_TRNCF_DL	=	0x1,
+	OPC_RH850_FPU_CEILF_DL,
+	OPC_RH850_FPU_FLOORF_DL,
+	OPC_RH850_FPU_CVTF_DL,
+	OPC_RH850_FPU_TRNCF_DUL	=	0x11,
+	OPC_RH850_FPU_CEILF_DUL,
+	OPC_RH850_FPU_FLOORF_DUL,
+	OPC_RH850_FPU_CVTF_DUL
+};
+
+/* OPC_RH850_FPU_GROUP_ABSD, variant defined by reg1 */
+enum {
+	OPC_RH850_FPU_ABSF_D	=	0x0,
+	OPC_RH850_FPU_NEGF_D,
+};
+
+/* OPC_RH850_FPU_GROUP_D, variant defined by reg1 */
+enum {
+	OPC_RH850_FPU_SQRTF_D	=	0x0,
+	OPC_RH850_FPU_RECIPF_D,
+	OPC_RH850_FPU_RSQRTF_D
+};
+
+/* Format F:I with category=1 */
+enum {
+	OPC_RH850_FPU_FMAF_S = 0x60 << 16,
+	OPC_RH850_FPU_FMSF_S = 0x62 << 16,
+	OPC_RH850_FPU_FNMAF_S = 0x64 << 16,
+	OPC_RH850_FPU_FNMSF_S = 0x66 << 16
+};
+
+#define MASK_OP_FORMAT_V_FORMAT_XIII(op) (op & (0x1F << 6))
+
+
+enum {
+	operation_LDL_W = 0,
+	operation_STC_W = 1,
+	operation_CLL = 2,
+};
+
+
+
+//////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////
+
+
+#define GET_B_IMM(inst) ((extract32(inst, 8, 4) << 1) \
+                         | (extract32(inst, 25, 6) << 5) \
+                         | (extract32(inst, 7, 1) << 11) \
+                         | (sextract64(inst, 31, 1) << 12))
+
+#define GET_STORE_IMM(inst) ((extract32(inst, 7, 5)) \
+                             | (sextract64(inst, 25, 7) << 5))
+
+#define GET_JAL_IMM(inst) ((extract32(inst, 21, 10) << 1) \
+                           | (extract32(inst, 20, 1) << 11) \
+                           | (extract32(inst, 12, 8) << 12) \
+                           | (sextract64(inst, 31, 1) << 20))
+
+
+#define GET_RS1(inst)  extract32(inst, 0, 5)		//appropriate for RH850
+#define GET_RS2(inst)  extract32(inst, 11, 5)		//appropriate for RH850
+#define GET_RS3(inst)  extract32(inst, 27, 5)		//appropriate for RH850
+#define GET_DISP(inst) (extract32(inst, 20, 7) | (sextract32(inst, 32, 16) << 7 ) ) //b47-b32 + b26-b20
+
+
+#define GET_RM(inst)   extract32(inst, 12, 3)
+#define GET_RD(inst)   extract32(inst, 7, 5)
+#define GET_IMM(inst)  sextract64(inst, 20, 12)
+#define GET_IMM_32(inst)	sextract64(inst, 16, 32)
+
+/* RVC decoding macros */
+#define GET_C_IMM(inst)             (extract32(inst, 2, 5) \
+                                    | (sextract64(inst, 12, 1) << 5))
+#define GET_C_ZIMM(inst)            (extract32(inst, 2, 5) \
+                                    | (extract32(inst, 12, 1) << 5))
+#define GET_C_ADDI4SPN_IMM(inst)    ((extract32(inst, 6, 1) << 2) \
+                                    | (extract32(inst, 5, 1) << 3) \
+                                    | (extract32(inst, 11, 2) << 4) \
+                                    | (extract32(inst, 7, 4) << 6))
+#define GET_C_ADDI16SP_IMM(inst)    ((extract32(inst, 6, 1) << 4) \
+                                    | (extract32(inst, 2, 1) << 5) \
+                                    | (extract32(inst, 5, 1) << 6) \
+                                    | (extract32(inst, 3, 2) << 7) \
+                                    | (sextract64(inst, 12, 1) << 9))
+#define GET_C_LWSP_IMM(inst)        ((extract32(inst, 4, 3) << 2) \
+                                    | (extract32(inst, 12, 1) << 5) \
+                                    | (extract32(inst, 2, 2) << 6))
+#define GET_C_LDSP_IMM(inst)        ((extract32(inst, 5, 2) << 3) \
+                                    | (extract32(inst, 12, 1) << 5) \
+                                    | (extract32(inst, 2, 3) << 6))
+#define GET_C_SWSP_IMM(inst)        ((extract32(inst, 9, 4) << 2) \
+                                    | (extract32(inst, 7, 2) << 6))
+#define GET_C_SDSP_IMM(inst)        ((extract32(inst, 10, 3) << 3) \
+                                    | (extract32(inst, 7, 3) << 6))
+#define GET_C_LW_IMM(inst)          ((extract32(inst, 6, 1) << 2) \
+                                    | (extract32(inst, 10, 3) << 3) \
+                                    | (extract32(inst, 5, 1) << 6))
+#define GET_C_LD_IMM(inst)          ((extract32(inst, 10, 3) << 3) \
+                                    | (extract32(inst, 5, 2) << 6))
+#define GET_C_J_IMM(inst)           ((extract32(inst, 3, 3) << 1) \
+                                    | (extract32(inst, 11, 1) << 4) \
+                                    | (extract32(inst, 2, 1) << 5) \
+                                    | (extract32(inst, 7, 1) << 6) \
+                                    | (extract32(inst, 6, 1) << 7) \
+                                    | (extract32(inst, 9, 2) << 8) \
+                                    | (extract32(inst, 8, 1) << 10) \
+                                    | (sextract64(inst, 12, 1) << 11))
+#define GET_C_B_IMM(inst)           ((extract32(inst, 3, 2) << 1) \
+                                    | (extract32(inst, 10, 2) << 3) \
+                                    | (extract32(inst, 2, 1) << 5) \
+                                    | (extract32(inst, 5, 2) << 6) \
+                                    | (sextract64(inst, 12, 1) << 8))
+#define GET_C_SIMM3(inst)           extract32(inst, 10, 3)
+#define GET_C_RD(inst)              GET_RD(inst)
+#define GET_C_RS1(inst)             GET_RD(inst)
+#define GET_C_RS2(inst)             extract32(inst, 2, 5)
+#define GET_C_RS1S(inst)            (8 + extract32(inst, 7, 3))
+#define GET_C_RS2S(inst)            (8 + extract32(inst, 2, 3))
+
+#endif /* _RH850_INSTMAP_H */
\ No newline at end of file
diff --git a/qemu/target/rh850/op_helper.c b/qemu/target/rh850/op_helper.c
new file mode 100644
index 0000000000..36e272e7a0
--- /dev/null
+++ b/qemu/target/rh850/op_helper.c
@@ -0,0 +1,89 @@
+/*
+ * RH850 Emulation Helpers for QEMU.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2017-2018 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+
+/* Exceptions processing helpers */
+void QEMU_NORETURN do_raise_exception_err(CPURH850State *env,
+                                          uint32_t exception, uintptr_t pc)
+{
+    CPUState *cs = CPU(rh850_env_get_cpu(env));
+    qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
+    cs->exception_index = exception;
+    cpu_loop_exit_restore(cs, pc);
+}
+
+void QEMU_NORETURN do_raise_exception_err_with_cause(CPURH850State *env,
+                                          uint32_t exception, uint32_t cause, uintptr_t pc)
+{
+    CPUState *cs = CPU(rh850_env_get_cpu(env));
+    //qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
+    cs->exception_index = exception;
+    env->exception_cause = cause; 
+    cpu_loop_exit_restore(cs, pc);
+}
+
+
+void helper_raise_exception(CPURH850State *env, uint32_t exception)
+{
+    do_raise_exception_err(env, exception, 0);
+}
+
+void helper_raise_exception_with_cause(CPURH850State *env, uint32_t exception, uint32_t cause)
+{
+    do_raise_exception_err_with_cause(env, exception, cause, 0);
+}
+
+target_ulong csr_read_helper(CPURH850State *env, target_ulong csrno)
+{
+    return 0;
+}
+
+#ifndef CONFIG_USER_ONLY
+
+/* iothread_mutex must be held */
+void rh850_set_local_interrupt(RH850CPU *cpu, target_ulong mask, int value)
+{
+}
+
+void rh850_set_mode(CPURH850State *env, target_ulong newpriv)
+{
+}
+
+void helper_tlb_flush(CPURH850State *env)
+{
+    RH850CPU *cpu = rh850_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
+    tlb_flush(cs);
+}
+
+void helper_uc_rh850_exit(CPURH850State *env)
+{
+    CPUState *cs = CPU(env);
+
+    cs->exception_index = EXCP_HLT;
+    cs->halted = 1;
+    cpu_loop_exit(cs);
+}
+
+#endif /* !CONFIG_USER_ONLY */
diff --git a/qemu/target/rh850/pmp.c b/qemu/target/rh850/pmp.c
new file mode 100644
index 0000000000..8f98659d3a
--- /dev/null
+++ b/qemu/target/rh850/pmp.c
@@ -0,0 +1,379 @@
+/*
+ * QEMU RH850 PMP (Physical Memory Protection)
+ *
+ * Author: Daire McNamara, daire.mcnamara@emdalo.com
+ *         Ivan Griffin, ivan.griffin@emdalo.com
+ *
+ * This provides a RH850 Physical Memory Protection implementation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * PMP (Physical Memory Protection) is as-of-yet unused and needs testing.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "cpu.h"
+#include "qemu-common.h"
+
+#ifndef CONFIG_USER_ONLY
+
+#define RH850_DEBUG_PMP 0
+#define PMP_DEBUG(fmt, ...)                                                    \
+    do {                                                                       \
+        if (RH850_DEBUG_PMP) {                                                 \
+            qemu_log_mask(LOG_TRACE, "%s: " fmt "\n", __func__, ##__VA_ARGS__);\
+        }                                                                      \
+    } while (0)
+
+static void pmp_write_cfg(CPURH850State *env, uint32_t addr_index,
+    uint8_t val);
+static uint8_t pmp_read_cfg(CPURH850State *env, uint32_t addr_index);
+static void pmp_update_rule(CPURH850State *env, uint32_t pmp_index);
+
+/*
+ * Accessor method to extract address matching type 'a field' from cfg reg
+ */
+static inline uint8_t pmp_get_a_field(uint8_t cfg)
+{
+    uint8_t a = cfg >> 3;
+    return a & 0x3;
+}
+
+/*
+ * Check whether a PMP is locked or not.
+ */
+static inline int pmp_is_locked(CPURH850State *env, uint32_t pmp_index)
+{
+
+    if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) {
+        return 1;
+    }
+
+    /* Top PMP has no 'next' to check */
+    if ((pmp_index + 1u) >= MAX_RH850_PMPS) {
+        return 0;
+    }
+
+    /* In TOR mode, need to check the lock bit of the next pmp
+     * (if there is a next)
+     */
+    const uint8_t a_field =
+        pmp_get_a_field(env->pmp_state.pmp[pmp_index + 1].cfg_reg);
+    if ((env->pmp_state.pmp[pmp_index + 1u].cfg_reg & PMP_LOCK) &&
+         (PMP_AMATCH_TOR == a_field)) {
+        return 1;
+    }
+
+    return 0;
+}
+
+/*
+ * Count the number of active rules.
+ */
+static inline uint32_t pmp_get_num_rules(CPURH850State *env)
+{
+     return env->pmp_state.num_rules;
+}
+
+/*
+ * Accessor to get the cfg reg for a specific PMP/HART
+ */
+static inline uint8_t pmp_read_cfg(CPURH850State *env, uint32_t pmp_index)
+{
+    if (pmp_index < MAX_RH850_PMPS) {
+        return env->pmp_state.pmp[pmp_index].cfg_reg;
+    }
+
+    return 0;
+}
+
+
+/*
+ * Accessor to set the cfg reg for a specific PMP/HART
+ * Bounds checks and relevant lock bit.
+ */
+static void pmp_write_cfg(CPURH850State *env, uint32_t pmp_index, uint8_t val)
+{
+    if (pmp_index < MAX_RH850_PMPS) {
+        if (!pmp_is_locked(env, pmp_index)) {
+            env->pmp_state.pmp[pmp_index].cfg_reg = val;
+            pmp_update_rule(env, pmp_index);
+        } else {
+            PMP_DEBUG("ignoring write - locked");
+        }
+    } else {
+        PMP_DEBUG("ignoring write - out of bounds");
+    }
+}
+
+static void pmp_decode_napot(target_ulong a, target_ulong *sa, target_ulong *ea)
+{
+    /*
+       aaaa...aaa0   8-byte NAPOT range
+       aaaa...aa01   16-byte NAPOT range
+       aaaa...a011   32-byte NAPOT range
+       ...
+       aa01...1111   2^XLEN-byte NAPOT range
+       a011...1111   2^(XLEN+1)-byte NAPOT range
+       0111...1111   2^(XLEN+2)-byte NAPOT range
+       1111...1111   Reserved
+    */
+    if (a == -1) {
+        *sa = 0u;
+        *ea = -1;
+        return;
+    } else {
+        target_ulong t1 = ctz64(~a);
+        target_ulong base = (a & ~(((target_ulong)1 << t1) - 1)) << 3;
+        target_ulong range = ((target_ulong)1 << (t1 + 3)) - 1;
+        *sa = base;
+        *ea = base + range;
+    }
+}
+
+
+/* Convert cfg/addr reg values here into simple 'sa' --> start address and 'ea'
+ *   end address values.
+ *   This function is called relatively infrequently whereas the check that
+ *   an address is within a pmp rule is called often, so optimise that one
+ */
+static void pmp_update_rule(CPURH850State *env, uint32_t pmp_index)
+{
+    int i;
+
+    env->pmp_state.num_rules = 0;
+
+    uint8_t this_cfg = env->pmp_state.pmp[pmp_index].cfg_reg;
+    target_ulong this_addr = env->pmp_state.pmp[pmp_index].addr_reg;
+    target_ulong prev_addr = 0u;
+    target_ulong sa = 0u;
+    target_ulong ea = 0u;
+
+    if (pmp_index >= 1u) {
+        prev_addr = env->pmp_state.pmp[pmp_index - 1].addr_reg;
+    }
+
+    switch (pmp_get_a_field(this_cfg)) {
+    case PMP_AMATCH_OFF:
+        sa = 0u;
+        ea = -1;
+        break;
+
+    case PMP_AMATCH_TOR:
+        sa = prev_addr << 2; /* shift up from [xx:0] to [xx+2:2] */
+        ea = (this_addr << 2) - 1u;
+        break;
+
+    case PMP_AMATCH_NA4:
+        sa = this_addr << 2; /* shift up from [xx:0] to [xx+2:2] */
+        ea = (this_addr + 4u) - 1u;
+        break;
+
+    case PMP_AMATCH_NAPOT:
+        pmp_decode_napot(this_addr, &sa, &ea);
+        break;
+
+    default:
+        sa = 0u;
+        ea = 0u;
+        break;
+    }
+
+    env->pmp_state.addr[pmp_index].sa = sa;
+    env->pmp_state.addr[pmp_index].ea = ea;
+
+    for (i = 0; i < MAX_RH850_PMPS; i++) {
+        const uint8_t a_field =
+            pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg);
+        if (PMP_AMATCH_OFF != a_field) {
+            env->pmp_state.num_rules++;
+        }
+    }
+}
+
+static int pmp_is_in_range(CPURH850State *env, int pmp_index, target_ulong addr)
+{
+    int result = 0;
+
+    if ((addr >= env->pmp_state.addr[pmp_index].sa)
+        && (addr <= env->pmp_state.addr[pmp_index].ea)) {
+        result = 1;
+    } else {
+        result = 0;
+    }
+
+    return result;
+}
+
+
+/*
+ * Public Interface
+ */
+
+/*
+ * Check if the address has required RWX privs to complete desired operation
+ */
+bool pmp_hart_has_privs(CPURH850State *env, target_ulong addr,
+    target_ulong size, pmp_priv_t privs)
+{
+    int i = 0;
+    int ret = -1;
+    target_ulong s = 0;
+    target_ulong e = 0;
+    pmp_priv_t allowed_privs = 0;
+
+    /* Short cut if no rules */
+    if (0 == pmp_get_num_rules(env)) {
+        return true;
+    }
+
+    /* 1.10 draft priv spec states there is an implicit order
+         from low to high */
+    for (i = 0; i < MAX_RH850_PMPS; i++) {
+        s = pmp_is_in_range(env, i, addr);
+        e = pmp_is_in_range(env, i, addr + size);
+
+        /* partially inside */
+        if ((s + e) == 1) {
+            PMP_DEBUG("pmp violation - access is partially inside");
+            ret = 0;
+            break;
+        }
+
+        /* fully inside */
+        const uint8_t a_field =
+            pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg);
+        if ((s + e) == 2) {
+            if (PMP_AMATCH_OFF == a_field) {
+                return 1;
+            }
+
+            allowed_privs = PMP_READ | PMP_WRITE | PMP_EXEC;
+            if ((env->priv != PRV_M) || pmp_is_locked(env, i)) {
+                allowed_privs &= env->pmp_state.pmp[i].cfg_reg;
+            }
+
+            if ((privs & allowed_privs) == privs) {
+                ret = 1;
+                break;
+            } else {
+                ret = 0;
+                break;
+            }
+        }
+    }
+
+    /* No rule matched */
+    if (ret == -1) {
+        if (env->priv == PRV_M) {
+            ret = 1; /* Privileged spec v1.10 states if no PMP entry matches an
+                      * M-Mode access, the access succeeds */
+        } else {
+            ret = 0; /* Other modes are not allowed to succeed if they don't
+                      * match a rule, but there are rules.  We've checked for
+                      * no rule earlier in this function. */
+        }
+    }
+
+    return ret == 1 ? true : false;
+}
+
+
+/*
+ * Handle a write to a pmpcfg CSP
+ */
+void pmpcfg_csr_write(CPURH850State *env, uint32_t reg_index,
+    target_ulong val)
+{
+    int i;
+    uint8_t cfg_val;
+
+    PMP_DEBUG("hart " TARGET_FMT_ld ": reg%d, val: 0x" TARGET_FMT_lx,
+        env->mhartid, reg_index, val);
+
+    if ((reg_index & 1) && (sizeof(target_ulong) == 8)) {
+        PMP_DEBUG("ignoring write - incorrect address");
+        return;
+    }
+
+    for (i = 0; i < sizeof(target_ulong); i++) {
+        cfg_val = (val >> 8 * i)  & 0xff;
+        pmp_write_cfg(env, (reg_index * sizeof(target_ulong)) + i,
+            cfg_val);
+    }
+}
+
+
+/*
+ * Handle a read from a pmpcfg CSP
+ */
+target_ulong pmpcfg_csr_read(CPURH850State *env, uint32_t reg_index)
+{
+    int i;
+    target_ulong cfg_val = 0;
+    uint8_t val = 0;
+
+    for (i = 0; i < sizeof(target_ulong); i++) {
+        val = pmp_read_cfg(env, (reg_index * sizeof(target_ulong)) + i);
+        cfg_val |= (val << (i * 8));
+    }
+
+    PMP_DEBUG("hart " TARGET_FMT_ld ": reg%d, val: 0x" TARGET_FMT_lx,
+        env->mhartid, reg_index, cfg_val);
+
+    return cfg_val;
+}
+
+
+/*
+ * Handle a write to a pmpaddr CSP
+ */
+void pmpaddr_csr_write(CPURH850State *env, uint32_t addr_index,
+    target_ulong val)
+{
+    PMP_DEBUG("hart " TARGET_FMT_ld ": addr%d, val: 0x" TARGET_FMT_lx,
+        env->mhartid, addr_index, val);
+
+    if (addr_index < MAX_RH850_PMPS) {
+        if (!pmp_is_locked(env, addr_index)) {
+            env->pmp_state.pmp[addr_index].addr_reg = val;
+            pmp_update_rule(env, addr_index);
+        } else {
+            PMP_DEBUG("ignoring write - locked");
+        }
+    } else {
+        PMP_DEBUG("ignoring write - out of bounds");
+    }
+}
+
+
+/*
+ * Handle a read from a pmpaddr CSP
+ */
+target_ulong pmpaddr_csr_read(CPURH850State *env, uint32_t addr_index)
+{
+    PMP_DEBUG("hart " TARGET_FMT_ld ": addr%d, val: 0x" TARGET_FMT_lx,
+        env->mhartid, addr_index,
+        env->pmp_state.pmp[addr_index].addr_reg);
+    if (addr_index < MAX_RH850_PMPS) {
+        return env->pmp_state.pmp[addr_index].addr_reg;
+    } else {
+        PMP_DEBUG("ignoring read - out of bounds");
+        return 0;
+    }
+}
+
+#endif
diff --git a/qemu/target/rh850/pmp.h b/qemu/target/rh850/pmp.h
new file mode 100644
index 0000000000..e6e43e8241
--- /dev/null
+++ b/qemu/target/rh850/pmp.h
@@ -0,0 +1,64 @@
+/*
+ * QEMU RH850 PMP (Physical Memory Protection)
+ *
+ * Author: Daire McNamara, daire.mcnamara@emdalo.com
+ *         Ivan Griffin, ivan.griffin@emdalo.com
+ *
+ * This provides a RH850 Physical Memory Protection interface
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _RH850_PMP_H_
+#define _RH850_PMP_H_
+
+typedef enum {
+    PMP_READ  = 1 << 0,
+    PMP_WRITE = 1 << 1,
+    PMP_EXEC  = 1 << 2,
+    PMP_LOCK  = 1 << 7
+} pmp_priv_t;
+
+typedef enum {
+    PMP_AMATCH_OFF,  /* Null (off)                            */
+    PMP_AMATCH_TOR,  /* Top of Range                          */
+    PMP_AMATCH_NA4,  /* Naturally aligned four-byte region    */
+    PMP_AMATCH_NAPOT /* Naturally aligned power-of-two region */
+} pmp_am_t;
+
+typedef struct {
+    target_ulong addr_reg;
+    uint8_t  cfg_reg;
+} pmp_entry_t;
+
+typedef struct {
+    target_ulong sa;
+    target_ulong ea;
+} pmp_addr_t;
+
+typedef struct {
+    pmp_entry_t pmp[MAX_RH850_PMPS];
+    pmp_addr_t  addr[MAX_RH850_PMPS];
+    uint32_t num_rules;
+} pmp_table_t;
+
+void pmpcfg_csr_write(CPURH850State *env, uint32_t reg_index,
+    target_ulong val);
+target_ulong pmpcfg_csr_read(CPURH850State *env, uint32_t reg_index);
+void pmpaddr_csr_write(CPURH850State *env, uint32_t addr_index,
+    target_ulong val);
+target_ulong pmpaddr_csr_read(CPURH850State *env, uint32_t addr_index);
+bool pmp_hart_has_privs(CPURH850State *env, target_ulong addr,
+    target_ulong size, pmp_priv_t priv);
+
+#endif
diff --git a/qemu/target/rh850/register_indices.h b/qemu/target/rh850/register_indices.h
new file mode 100644
index 0000000000..20fcea8cae
--- /dev/null
+++ b/qemu/target/rh850/register_indices.h
@@ -0,0 +1,63 @@
+/*
+ * register_indices.h
+ *
+ *  Created on: Jun 18, 2018
+ *
+ */
+
+#ifndef TARGET_RH850_REGISTER_INDICES_H_
+#define TARGET_RH850_REGISTER_INDICES_H_
+
+
+// BANK ID 0, sys basic regs
+#define EIPC_IDX     0
+#define EIPSW_IDX    1
+#define FEPC_IDX     2
+#define FEPSW_IDX    3
+#define PSW_IDX	 	 5	//program status word
+// sysFpuRegs indices
+#define FPSR_IDX     6   //floating-point configuration/status   <---write the bit defines
+#define FPEPC_IDX    7   //floating point exception PC
+#define FPST_IDX     8
+#define FPCC_IDX     9
+#define FPCFG_IDX   10
+#define FPEC_IDX    11
+
+#define EIIC_IDX	13	//EI level exception cause
+#define FEIC_IDX	14	//FI level exception cause
+#define CTPC_IDX    16
+#define CTPSW_IDX   17
+#define CTBP_IDX    20
+#define EIWR_IDX    28
+#define FEWR_IDX    29
+#define BSEL_IDX    31
+
+// BANK ID 1, sys basic regs
+#define MCFG0_IDX1	0	//machine configuration
+#define RBASE_IDX1	2	//reset vector base address (if psw.ebv==0, this is also exception vector)
+#define EBASE_IDX1	3	//exception handler vector address
+#define INTBP_IDX1  4
+#define MCTL_IDX1   5	//CPU control
+#define PID_IDX1    6   //processor ID
+#define SCCFG_IDX1  11  // SYSCALL config
+#define SCBP_IDX1   12  // SYSCALL base pointer
+
+// BANK ID 2, sys basic regs
+#define HTCFG0_IDX2	0	//thread configuration
+#define MEA_IDX2	6	//memory error address (when misaligned or MPU)
+#define ASID_IDX2	7	//memory error address (when misaligned or MPU)
+#define MEI_IDX2	8	//memory error info (info about instruction that caused exception)
+
+// BANK ID 1, 2 sysInterruptRegs indices
+#define FPIPR_IDX1  7
+#define ISPR_IDX2   10
+#define PMR_IDX2    11
+#define ICSR_IDX2	12	//interrupt control status register
+#define INTCFG_IDX2	13	//interrupt function setting
+
+
+// BANK ID 5, 6, 7 system MPU regs indices
+#define MPM_IDX5	0	//memory protection operation mode
+
+
+#endif /* TARGET_RH850_REGISTER_INDICES_H_ */
diff --git a/qemu/target/rh850/translate.c b/qemu/target/rh850/translate.c
new file mode 100644
index 0000000000..7081656c95
--- /dev/null
+++ b/qemu/target/rh850/translate.c
@@ -0,0 +1,5190 @@
+/*
+ * RH850 emulation for qemu: main translation routines.
+ *
+ * Copyright (c) 2018 iSYSTEM Labs d.o.o.
+ * Copyright (c) 2023 Quarkslab
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "qemu/log.h"
+#include "qemu/host-utils.h"
+#include "exec/cpu_ldst.h"
+#include "exec/gen-icount.h"
+#include "exec/helper-proto.h"
+#include "exec/helper-gen.h"
+#include "exec/translator.h"
+
+#include "instmap.h"
+
+#include "unicorn/platform.h"
+#include "uc_priv.h"
+#include "translate.h"
+#include "fpu_translate.h"
+
+/*
+ * Unicorn: Special disas state for exiting in the middle of tb.
+ */
+
+/* We are not using a goto_tb (for whatever reason), but have updated
+   the PC (for whatever reason), so there's no need to do it again on
+   exiting the TB.  */
+#define DISAS_PC_UPDATED        DISAS_TARGET_0
+
+/* We have emitted one or more goto_tb.  No fixup required.  */
+#define DISAS_GOTO_TB           DISAS_TARGET_1
+
+/* We have updated the PC and CC values.  */
+#define DISAS_PC_CC_UPDATED     DISAS_TARGET_2
+
+/* We are exiting the TB, but have neither emitted a goto_tb, nor
+   updated the PC for the next instruction to be executed.  */
+#define DISAS_PC_STALE          DISAS_TARGET_3
+
+/* We are exiting the TB to the main loop.  */
+#define DISAS_PC_STALE_NOCHAIN  DISAS_TARGET_4
+
+#define DISAS_UNICORN_HALT DISAS_TARGET_11
+
+/* global register indices */
+static TCGv cpu_gpr[NUM_GP_REGS];
+static TCGv cpu_pc;
+static TCGv cpu_sysRegs[NUM_SYS_REG_BANKS][MAX_SYS_REGS_IN_BANK];
+// static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
+static TCGv cpu_sysDatabuffRegs[1], cpu_LLbit, cpu_LLAddress;
+static TCGv load_res;
+static TCGv load_val;
+
+// PSW register flags. These are for temporary use only during
+// calculations. Before usage they should be set from PSW and
+// stored back to PSW after changes.
+// TODO: since PSW as a register is rarely used - only when ld/str sys reg and
+// on some branches (TRAP, ...) it makes sense to compose/decompose PSW
+// on these occcasions and not have PSW stored in registers below.
+TCGv_i32 cpu_ZF, cpu_SF, cpu_OVF, cpu_CYF, cpu_SATF, cpu_ID, cpu_EP, cpu_NP,
+		cpu_EBV, cpu_CU0, cpu_CU1, cpu_CU2, cpu_UM;
+
+
+/** Const, RH850 does not have MMU. */
+const int MEM_IDX = 0;
+
+/* is_jmp field values */
+#define DISAS_INDIRECT_JUMP                 DISAS_TARGET_0 /* only pc was modified dynamically */
+#define DISAS_EXIT_TB                       DISAS_TARGET_1 /* cpu state was modified dynamically */
+#define DISAS_TB_EXIT_ALREADY_GENERATED     DISAS_TARGET_2
+#define CASE_OP_32_64(X)                    case X
+
+/* Possible conditions for tests. */
+enum {
+	V_COND 		= 0,		/* OV = 1 */
+	C_COND 		= 1,		/* CY = 1 */
+	Z_COND 		= 2,		/* Z = 1 */
+	NH_COND 	= 3,		/* (CY or Z) = 1 */
+	S_COND		= 4,		/* S = 1 */
+	T_COND		= 5,		/* Always */
+	LT_COND		= 6,		/* (S xor OV) = 1 */
+	LE_COND 	= 7,		/* ((S xor OV) or Z) = 1 */
+
+	NV_COND 	= 8,		/* OV = 0 */
+	NC_COND 	= 9,		/* CY = 0 */
+	NZ_COND 	= 10,		/* Z = 0 */
+	H_COND 		= 11,		/* (CY or Z) = 0 */
+	NS_COND		= 12,		/* S = 0 */
+	SA_COND		= 13,		/* SAT = 1 */
+	GE_COND		= 14,		/* (S xor OV) = 0 */
+	GT_COND 	= 15,		/* ((S xor OV) or Z) = 0 */
+};
+
+// Enumeration for Cache operations.
+enum {
+	CHBII = 0x0,
+	CIBII = 0x20,
+	CFALI = 0x40,
+	CISTI = 0x60,
+	CILDI = 0x61,
+	CLL = 0x7e,
+};
+
+enum {
+	OPC_RH850_BINS = 123456,
+};
+
+
+static void gen_exception_debug(DisasContext *dc)
+{
+    TCGContext *tcg_ctx = dc->uc->tcg_ctx;
+
+    TCGv_i32 helper_tmp = tcg_const_i32(tcg_ctx, EXCP_DEBUG);
+    gen_helper_raise_exception(tcg_ctx, tcg_ctx->cpu_env, helper_tmp);
+    tcg_temp_free_i32(tcg_ctx, helper_tmp);
+
+    dc->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED;
+}
+
+static void gen_exception_halt(DisasContext *dc)
+{
+    TCGContext *tcg_ctx = dc->uc->tcg_ctx;
+
+    TCGv_i32 helper_tmp = tcg_const_i32(tcg_ctx, EXCP_HLT);
+    gen_helper_raise_exception(tcg_ctx, tcg_ctx->cpu_env, helper_tmp);
+    tcg_temp_free_i32(tcg_ctx, helper_tmp);
+
+    dc->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED;
+}
+
+
+static void gen_goto_tb_imm(DisasContext *ctx, int n, target_ulong dest)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+    if (unlikely(ctx->base.singlestep_enabled)) {
+        tcg_gen_movi_tl(tcg_ctx, cpu_pc, dest);
+        gen_exception_debug(ctx);
+    } else {
+        tcg_gen_goto_tb(tcg_ctx, n);
+        tcg_gen_movi_tl(tcg_ctx, cpu_pc, dest);
+        tcg_gen_exit_tb(tcg_ctx, ctx->base.tb, n);
+    }
+}
+
+
+/* Wrapper for getting reg values - need to check of reg is zero since
+ * cpu_gpr[0] is not actually allocated
+ */
+void gen_get_gpr(TCGContext *tcg_ctx, TCGv t, int reg_num)
+{
+    if (reg_num == 0) {
+        tcg_gen_movi_tl(tcg_ctx, t, 0);
+    } else {
+        tcg_gen_mov_tl(tcg_ctx, t, cpu_gpr[reg_num]);
+    }
+
+}
+
+
+/* Wrapper for setting system register values. */
+
+void gen_set_spr(TCGContext *tcg_ctx, int bank_id, int reg_id, TCGv t)
+{
+    tcg_gen_mov_tl(tcg_ctx, cpu_sysRegs[bank_id][reg_id], t);
+}
+
+/* Wrapper for gettint sysreg values. */
+void gen_get_spr(TCGContext *tcg_ctx, int bank_id, int reg_id, TCGv t)
+{
+    tcg_gen_mov_tl(tcg_ctx, t, cpu_sysRegs[bank_id][reg_id]);
+}
+
+/* Wrapper for setting reg values - need to check of reg is zero since
+ * cpu_gpr[0] is not actually allocated. this is more for safety purposes,
+ * since we usually avoid calling the OP_TYPE_gen function if we see a write to
+ * $zero
+ */
+void gen_set_gpr(TCGContext *tcg_ctx, int reg_num_dst, TCGv t)
+{
+    if (reg_num_dst != 0) {
+        tcg_gen_mov_tl(tcg_ctx, cpu_gpr[reg_num_dst], t);
+    }
+}
+
+
+/**
+ * gen_goto_tb_rl() is a customized version of gen_goto_tb() that is able to
+ * move PC into a specified register before updating PC. V850e3 JARL/JR insts.
+ * work this way :).
+ **/
+
+static void gen_goto_tb_rl(DisasContext *ctx, int n, int reg, int insn_size, uint32_t dest)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGv reg_value = tcg_temp_new_i32(tcg_ctx);
+
+    if (unlikely(ctx->base.singlestep_enabled))
+    {
+
+        /* GR[reg] <- PC + insn_size */
+        tcg_gen_movi_i32(tcg_ctx, reg_value, ctx->pc);
+        tcg_gen_addi_i32(tcg_ctx, reg_value, reg_value, insn_size);
+        gen_set_gpr(tcg_ctx, reg, reg_value);
+
+        /* PC <- dest */
+        tcg_gen_movi_i32(tcg_ctx, cpu_pc, dest);
+
+        tcg_temp_free_i32(tcg_ctx, reg_value);
+
+        /* Generate exception. */
+        gen_exception_debug(ctx);
+    }
+    else
+    {
+        tcg_gen_goto_tb(tcg_ctx, n);
+
+        /* GR[reg] <- PC + insn_size */
+        tcg_gen_movi_i32(tcg_ctx, reg_value, ctx->pc);
+        tcg_gen_addi_i32(tcg_ctx, reg_value, reg_value, insn_size);
+        gen_set_gpr(tcg_ctx, reg, reg_value);
+
+        /* PC <- dest */
+        tcg_gen_movi_i32(tcg_ctx, cpu_pc, dest);
+
+        tcg_temp_free_i32(tcg_ctx, reg_value);
+
+        tcg_gen_exit_tb(tcg_ctx, ctx->base.tb, n);
+    }
+}
+
+
+static inline void tcgv_to_flags(TCGContext *tcg_ctx, TCGv reg)
+{
+    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_mov_i32(tcg_ctx, temp, reg);
+    tcg_gen_andi_i32(tcg_ctx, cpu_ZF, temp, 0x1);
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_SF, temp, 0x1);
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_OVF, temp, 0x1);
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_CYF, temp, 0x1);
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_SATF, temp, 0x1);
+
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_ID, temp, 0x1);
+
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_EP, temp, 0x1);
+
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_NP, temp, 0x1);
+
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x8);
+    tcg_gen_andi_i32(tcg_ctx, cpu_EBV, temp, 0x1);
+
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_CU0, temp, 0x1);
+
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_CU1, temp, 0x1);
+
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_CU2, temp, 0x1);
+
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x12);
+    tcg_gen_andi_i32(tcg_ctx, cpu_UM, temp, 0x1);
+
+    tcg_temp_free(tcg_ctx, temp);
+}
+
+
+static void tcgv_to_flags_z_cy_ov_s_sat(TCGContext *tcg_ctx, TCGv reg)
+{
+    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_mov_i32(tcg_ctx, temp, reg);
+    tcg_gen_andi_i32(tcg_ctx, cpu_ZF, temp, 0x1);
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_SF, temp, 0x1);
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_OVF, temp, 0x1);
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_CYF, temp, 0x1);
+    tcg_gen_shri_i32(tcg_ctx, temp, temp, 0x1);
+    tcg_gen_andi_i32(tcg_ctx, cpu_SATF, temp, 0x1);
+    tcg_temp_free(tcg_ctx, temp);
+}
+
+
+static void flags_to_tcgv_id_ep_np_ebv_cu_um(TCGContext *tcg_ctx, TCGv reg)
+{
+    // Set flags in PSW to 0 so we can OR with new state
+    tcg_gen_andi_i32(tcg_ctx, reg, reg, 0xbff87f1f);
+
+    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_ID, 0x5);
+    tcg_gen_or_i32(tcg_ctx, reg, reg,temp);
+
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_EP, 0x6);
+    tcg_gen_or_i32(tcg_ctx, reg, reg,temp);
+
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_NP, 0x7);
+    tcg_gen_or_i32(tcg_ctx, reg, reg,temp);
+
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_EBV, 0xF);
+    tcg_gen_or_i32(tcg_ctx, reg, reg,temp);
+
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_CU0, 0x10);
+    tcg_gen_or_i32(tcg_ctx, reg, reg,temp);
+
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_CU1, 0x11);
+    tcg_gen_or_i32(tcg_ctx, reg, reg,temp);
+
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_CU2, 0x12);
+    tcg_gen_or_i32(tcg_ctx, reg, reg,temp);
+
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_UM, 0x1E);
+    tcg_gen_or_i32(tcg_ctx, reg, reg,temp);
+
+    tcg_temp_free(tcg_ctx, temp);
+}
+
+
+static void flags_to_tcgv_z_cy_ov_s_sat(TCGContext *tcg_ctx, TCGv reg)
+{
+    // update psw register, first reset flags before ORing new values
+    tcg_gen_andi_i32(tcg_ctx, reg, reg, 0xffffffe0);
+    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_or_i32(tcg_ctx, reg, reg, cpu_ZF);
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_SF, 0x1);
+    tcg_gen_or_i32(tcg_ctx, reg,reg,temp);
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_OVF, 0x2);
+    tcg_gen_or_i32(tcg_ctx, reg,reg,temp);
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_CYF, 0x3);
+    tcg_gen_or_i32(tcg_ctx, reg,reg,temp);
+    tcg_gen_shli_i32(tcg_ctx, temp, cpu_SATF, 0x4);
+    tcg_gen_or_i32(tcg_ctx, reg,reg,temp);
+    tcg_temp_free(tcg_ctx, temp);
+}
+
+
+static void flags_to_tcgv(TCGContext *tcg_ctx, TCGv reg)
+{
+    flags_to_tcgv_z_cy_ov_s_sat(tcg_ctx, reg);
+    flags_to_tcgv_id_ep_np_ebv_cu_um(tcg_ctx, reg);
+}
+
+
+static TCGv condition_satisfied(TCGContext *tcg_ctx, int cond)
+{
+	TCGv condResult = tcg_temp_new_i32(tcg_ctx);
+	tcg_gen_movi_i32(tcg_ctx, condResult, 0x0);
+
+	switch(cond) {
+		case GE_COND:
+			tcg_gen_xor_i32(tcg_ctx, condResult, cpu_SF, cpu_OVF);
+			tcg_gen_not_i32(tcg_ctx, condResult, condResult);
+			tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1);
+			break;
+		case GT_COND:
+			tcg_gen_xor_i32(tcg_ctx, condResult, cpu_SF, cpu_OVF);
+			tcg_gen_or_i32(tcg_ctx, condResult, condResult, cpu_ZF);
+			tcg_gen_not_i32(tcg_ctx, condResult, condResult);
+			tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1);
+			break;
+		case LE_COND:
+			tcg_gen_xor_i32(tcg_ctx, condResult, cpu_SF, cpu_OVF);
+			tcg_gen_or_i32(tcg_ctx, condResult, condResult, cpu_ZF);
+			break;
+		case LT_COND:
+			tcg_gen_xor_i32(tcg_ctx, condResult, cpu_SF, cpu_OVF);
+			break;
+
+		case H_COND:
+			tcg_gen_or_i32(tcg_ctx, condResult, cpu_CYF, cpu_ZF);
+			tcg_gen_not_i32(tcg_ctx, condResult, condResult);
+			tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1);
+			break;
+		case NH_COND:
+			tcg_gen_or_i32(tcg_ctx, condResult, cpu_CYF, cpu_ZF);
+			break;
+
+		case NS_COND:
+			tcg_gen_not_i32(tcg_ctx, condResult, cpu_SF);
+			tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1);
+			break;
+
+		case S_COND:
+		    tcg_gen_mov_i32(tcg_ctx, condResult, cpu_SF);
+		    break;
+
+		case C_COND:
+            tcg_gen_mov_i32(tcg_ctx, condResult, cpu_CYF);
+            break;
+
+		case NC_COND:
+			tcg_gen_not_i32(tcg_ctx, condResult, cpu_CYF);
+			tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1);
+			break;
+		case NV_COND:
+			tcg_gen_not_i32(tcg_ctx, condResult, cpu_OVF);
+			tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1);
+			break;
+		case NZ_COND:
+			tcg_gen_not_i32(tcg_ctx, condResult, cpu_ZF);
+			tcg_gen_andi_i32(tcg_ctx, condResult, condResult, 0x1);
+			break;
+
+		case SA_COND:
+            tcg_gen_mov_i32(tcg_ctx, condResult, cpu_SATF);
+            break;
+		case T_COND:
+			tcg_gen_movi_i32(tcg_ctx, condResult, 0x1);
+			break;
+		case V_COND:
+            tcg_gen_mov_i32(tcg_ctx, condResult, cpu_OVF);
+            break;
+		case Z_COND:
+            tcg_gen_mov_i32(tcg_ctx, condResult, cpu_ZF);
+            break;
+	}
+
+	return condResult;
+}
+
+static void gen_flags_on_add(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1)
+{
+	TCGLabel *cont;
+	TCGLabel *end;
+
+    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_movi_i32(tcg_ctx, tmp, 0);
+    // 'add2(rl, rh, al, ah, bl, bh) creates 64-bit values and adds them:
+    // [CYF : SF] = [tmp : t0] + [tmp : t1]
+    // While CYF is 0 or 1, SF bit 15 contains sign, so it
+    // must be shifted 31 bits to the right later.
+    tcg_gen_add2_i32(tcg_ctx, cpu_SF, cpu_CYF, t0, tmp, t1, tmp);
+    tcg_gen_mov_i32(tcg_ctx, cpu_ZF, cpu_SF);
+
+    tcg_gen_xor_i32(tcg_ctx, cpu_OVF, cpu_SF, t0);
+    tcg_gen_xor_i32(tcg_ctx, tmp, t0, t1);
+    tcg_gen_andc_i32(tcg_ctx, cpu_OVF, cpu_OVF, tmp);
+
+    tcg_gen_shri_i32(tcg_ctx, cpu_SF, cpu_SF, 0x1f);
+    tcg_gen_shri_i32(tcg_ctx, cpu_OVF, cpu_OVF, 0x1f);
+
+    tcg_temp_free_i32(tcg_ctx, tmp);
+
+    cont = gen_new_label(tcg_ctx);
+	end = gen_new_label(tcg_ctx);
+
+	tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, 0x0, cont);
+	tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x1);
+	tcg_gen_br(tcg_ctx, end);
+
+	gen_set_label(tcg_ctx, cont);
+	tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x0);
+
+	gen_set_label(tcg_ctx, end);
+}
+
+
+static void gen_satadd_CC(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1, TCGv_i32 result)
+{
+	TCGLabel *cont;
+	TCGLabel *end;
+
+    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_movi_i32(tcg_ctx, tmp, 0);
+    tcg_gen_add2_i32(tcg_ctx, cpu_SF, cpu_CYF, t0, tmp, t1, tmp);
+    tcg_gen_mov_i32(tcg_ctx, cpu_ZF, cpu_SF);
+    tcg_gen_xor_i32(tcg_ctx, cpu_OVF, cpu_SF, t0);
+    tcg_gen_xor_i32(tcg_ctx, tmp, t0, t1);
+    tcg_gen_andc_i32(tcg_ctx, cpu_OVF, cpu_OVF, tmp);
+
+    tcg_gen_shri_i32(tcg_ctx, cpu_SF, result, 0x1f);
+    tcg_gen_shri_i32(tcg_ctx, cpu_OVF, cpu_OVF, 0x1f);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+
+    cont = gen_new_label(tcg_ctx);
+	end = gen_new_label(tcg_ctx);
+
+	tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, 0x0, cont);
+	tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x1);
+	tcg_gen_br(tcg_ctx, end);
+
+	gen_set_label(tcg_ctx, cont);
+	tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x0);
+
+	gen_set_label(tcg_ctx, end);
+}
+
+static void gen_flags_on_sub(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1)
+{
+    tcg_gen_sub_tl(tcg_ctx, cpu_SF, t0, t1);
+    tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GTU, cpu_CYF, t1, t0);
+    tcg_gen_setcond_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, t0, t1);
+    tcg_gen_xor_i32(tcg_ctx, cpu_OVF, cpu_SF, t0);
+    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_xor_i32(tcg_ctx, tmp, t0, t1);
+    tcg_gen_and_i32(tcg_ctx, cpu_OVF, cpu_OVF, tmp);
+
+    tcg_gen_shri_i32(tcg_ctx, cpu_SF, cpu_SF, 0x1f);
+	tcg_gen_shri_i32(tcg_ctx, cpu_OVF, cpu_OVF, 0x1f);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+}
+
+static void gen_satsub_CC(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1, TCGv_i32 result)
+{
+	TCGLabel *cont;
+	TCGLabel *end;
+
+    TCGv_i32 tmp;
+    tcg_gen_sub_tl(tcg_ctx, cpu_SF, t0, t1);
+
+    tcg_gen_mov_i32(tcg_ctx, cpu_ZF, cpu_SF);
+    tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GTU, cpu_CYF, t1, t0);
+    tcg_gen_xor_i32(tcg_ctx, cpu_OVF, cpu_SF, t0);
+    tmp = tcg_temp_new_i32(tcg_ctx);
+    tcg_gen_xor_i32(tcg_ctx, tmp, t0, t1);
+    tcg_gen_and_i32(tcg_ctx, cpu_OVF, cpu_OVF, tmp);
+
+    tcg_gen_shri_i32(tcg_ctx, cpu_SF, result, 0x1f);
+	tcg_gen_shri_i32(tcg_ctx, cpu_OVF, cpu_OVF, 0x1f);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+
+    cont = gen_new_label(tcg_ctx);
+	end = gen_new_label(tcg_ctx);
+
+	tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, 0x0, cont);
+	tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x1);
+	tcg_gen_br(tcg_ctx, end);
+
+	gen_set_label(tcg_ctx, cont);
+	tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x0);
+
+	gen_set_label(tcg_ctx, end);
+}
+
+static void gen_logic_CC(TCGContext *tcg_ctx, TCGv_i32 result){
+
+	TCGLabel *cont;
+	TCGLabel *end;
+
+	tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+	tcg_gen_shri_i32(tcg_ctx, cpu_SF, result, 0x1f);
+
+	cont = gen_new_label(tcg_ctx);
+	end = gen_new_label(tcg_ctx);
+
+	tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, result, 0x0, cont);
+	tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x1);
+	tcg_gen_br(tcg_ctx, end);
+
+	gen_set_label(tcg_ctx, cont);
+	tcg_gen_movi_i32(tcg_ctx, cpu_ZF, 0x0);
+
+	gen_set_label(tcg_ctx, end);
+}
+
+
+static void gen_load(DisasContext *ctx, int memop, int rd, int rs1,
+		target_long imm, unsigned is_disp23)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGv t0 = tcg_temp_new(tcg_ctx);
+    TCGv t1 = tcg_temp_new(tcg_ctx);
+    TCGv tcg_imm = tcg_temp_new(tcg_ctx);
+    TCGv_i64 t1_64 = tcg_temp_new_i64(tcg_ctx);
+    TCGv t1_high = tcg_temp_new(tcg_ctx);
+
+    gen_get_gpr(tcg_ctx, t0, rs1);
+	tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm);
+
+    if (!is_disp23)
+    	tcg_gen_ext16s_i32(tcg_ctx, tcg_imm, tcg_imm);
+    else {
+        tcg_gen_shli_i32(tcg_ctx, tcg_imm, tcg_imm, 9);
+        tcg_gen_sari_i32(tcg_ctx, tcg_imm, tcg_imm, 9);
+    }
+
+	tcg_gen_add_tl(tcg_ctx, t0, t0, tcg_imm);
+
+    if (memop == MO_TEQ) {
+        tcg_gen_qemu_ld_i64(tcg_ctx, t1_64, t0, MEM_IDX, memop);
+        tcg_gen_extrl_i64_i32(tcg_ctx, t1, t1_64);
+        tcg_gen_extrh_i64_i32(tcg_ctx, t1_high, t1_64);
+        gen_set_gpr(tcg_ctx, rd, t1);
+        gen_set_gpr(tcg_ctx, rd+1, t1_high);
+    }
+    else {
+    	tcg_gen_qemu_ld_tl(tcg_ctx, t1, t0, MEM_IDX, memop);
+        gen_set_gpr(tcg_ctx, rd, t1);
+    }
+
+    tcg_temp_free(tcg_ctx, t0);
+    tcg_temp_free(tcg_ctx, t1);
+    tcg_temp_free(tcg_ctx, tcg_imm);
+    tcg_temp_free_i64(tcg_ctx, t1_64);
+    tcg_temp_free(tcg_ctx, t1_high);
+}
+
+static void gen_store(DisasContext *ctx, int memop, int rs1, int rs2,
+        target_long imm, unsigned is_disp23)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+    TCGv t0 = tcg_temp_new(tcg_ctx);
+    TCGv dat = tcg_temp_new(tcg_ctx);
+    TCGv tcg_imm = tcg_temp_new(tcg_ctx);
+    TCGv dat_high = tcg_temp_new(tcg_ctx);
+    TCGv_i64 dat64 = tcg_temp_new_i64(tcg_ctx);
+
+    gen_get_gpr(tcg_ctx, t0, rs1);				// loading rs1 to t0
+    tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm);
+
+    if (!is_disp23)
+    	tcg_gen_ext16s_i32(tcg_ctx, tcg_imm, tcg_imm);
+    else {
+        tcg_gen_shli_i32(tcg_ctx, tcg_imm, tcg_imm, 9);
+        tcg_gen_sari_i32(tcg_ctx, tcg_imm, tcg_imm, 9);
+    }
+
+    tcg_gen_add_tl(tcg_ctx, t0, t0, tcg_imm);	// adding displacement to t0
+
+    gen_get_gpr(tcg_ctx, dat, rs2);				// getting data from rs2
+
+    if (memop == MO_TEQ) {
+        gen_get_gpr(tcg_ctx, dat_high, rs2+1);
+        tcg_gen_concat_i32_i64(tcg_ctx, dat64, dat, dat_high);
+    	tcg_gen_qemu_st_i64(tcg_ctx, dat64, t0, MEM_IDX, memop);
+    }
+    else {
+    	tcg_gen_qemu_st_tl(tcg_ctx, dat, t0, MEM_IDX, memop);
+    }
+
+    // clear possible mutex
+	TCGLabel *l = gen_new_label(tcg_ctx);
+    tcg_gen_brcond_i32(tcg_ctx, TCG_COND_NE, t0, cpu_LLAddress, l);
+    tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_LLbit, 0x1, l);
+    tcg_gen_movi_i32(tcg_ctx, cpu_LLbit, 0);
+    gen_set_label(tcg_ctx, l);
+
+    tcg_temp_free(tcg_ctx, t0);
+    tcg_temp_free(tcg_ctx, dat);
+    tcg_temp_free(tcg_ctx, tcg_imm);
+    tcg_temp_free_i64(tcg_ctx, dat64);
+    tcg_temp_free(tcg_ctx, dat_high);
+}
+
+static void gen_mutual_exclusion(DisasContext *ctx, int rs3, int rs1, int operation)
+{
+	/* LDL.W, STC.W, CLL: Implement as described.
+	Add two additional global CPU registers called LLBit and LLAddress.
+	Set them with LDL.W, and reset them with STC.W.
+	If LLBit is not set or LLAddress does not match STC.W address, make STC.W fail.
+	CLL clears LLBit.
+	Since we do not implement multicore CPU emulation, this implementation should be OK. */
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+    if (operation == operation_LDL_W)
+    {
+        TCGv adr = tcg_temp_new(tcg_ctx);
+        TCGv dat = tcg_temp_new(tcg_ctx);
+
+        gen_get_gpr(tcg_ctx, adr, rs1);
+		tcg_gen_qemu_ld_tl(tcg_ctx, dat, adr, MEM_IDX, MO_TESL);
+		gen_set_gpr(tcg_ctx, rs3, dat);
+
+		tcg_temp_free(tcg_ctx, adr);
+		tcg_temp_free(tcg_ctx, dat);
+
+		tcg_gen_movi_i32(tcg_ctx, cpu_LLbit, 1);
+		tcg_gen_mov_i32(tcg_ctx, cpu_LLAddress, adr);
+    }
+    else if (operation == operation_STC_W)
+    {
+        TCGv adr = tcg_temp_local_new(tcg_ctx);
+        TCGv dat = tcg_temp_local_new(tcg_ctx);
+        TCGv token = tcg_temp_local_new(tcg_ctx);
+		TCGLabel *l_fail = gen_new_label(tcg_ctx);
+		TCGLabel *l_ok = gen_new_label(tcg_ctx);
+
+	    tcg_gen_mov_i32(tcg_ctx, token, cpu_LLbit);
+	    tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, token, 0x1, l_fail);
+        gen_get_gpr(tcg_ctx, adr, rs1);
+        gen_get_gpr(tcg_ctx, dat, rs3);
+	    tcg_gen_brcond_i32(tcg_ctx, TCG_COND_NE, adr, cpu_LLAddress, l_fail);
+        tcg_gen_qemu_st_tl(tcg_ctx, dat, adr, MEM_IDX, MO_TESL);
+	    tcg_gen_movi_i32(tcg_ctx, dat, 1);
+        tcg_gen_br(tcg_ctx, l_ok);
+
+	    gen_set_label(tcg_ctx, l_fail);
+        tcg_gen_movi_i32(tcg_ctx, dat, 0);
+	    gen_set_label(tcg_ctx, l_ok);
+		gen_set_gpr(tcg_ctx, rs3, dat);
+
+        tcg_gen_movi_tl(tcg_ctx, cpu_LLbit, 0);
+
+        tcg_temp_free(tcg_ctx, adr);
+        tcg_temp_free(tcg_ctx, dat);
+        tcg_temp_free(tcg_ctx, token);
+    }
+    else if (operation == operation_CLL)
+    {
+		tcg_gen_movi_i32(tcg_ctx, cpu_LLbit, 0);
+    }
+    else
+    	printf("ERROR gen_mutual_exclusion \n");
+}
+
+
+static void gen_multiply(DisasContext *ctx, int rs1, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGv r1 = tcg_temp_new(tcg_ctx);		//temp
+	TCGv r2 = tcg_temp_new(tcg_ctx);		//temp
+
+	gen_get_gpr(tcg_ctx, r1, rs1);			//loading rs1 to t0
+	gen_get_gpr(tcg_ctx, r2, rs2);			//loading rs2 to t1
+	int imm = rs1;
+	int imm_32;
+	int int_rs3;
+
+	TCGv tcg_imm = tcg_temp_new(tcg_ctx);
+	TCGv tcg_imm32 = tcg_temp_new(tcg_ctx);
+	TCGv tcg_r3 = tcg_temp_new(tcg_ctx);
+	TCGv tcg_temp = tcg_temp_new(tcg_ctx);
+
+	switch(operation){
+		case OPC_RH850_MUL_reg1_reg2_reg3:
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3,int_rs3);
+
+			tcg_gen_muls2_i32(tcg_ctx, r2, tcg_r3, r1, r2);
+			if(rs2!=int_rs3){
+				gen_set_gpr(tcg_ctx, rs2, r2);
+			}
+			gen_set_gpr(tcg_ctx, int_rs3,tcg_r3);
+			break;
+
+		case OPC_RH850_MUL_imm9_reg2_reg3:
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3,int_rs3);
+
+			imm_32 = extract32(ctx->opcode, 18, 4);
+			imm_32 = imm | (imm_32 << 5);
+
+			// sign extension
+			if((imm_32 & 0x100) == 0x100){
+				imm_32 = imm_32 | (0x7f << 9);
+			}
+			tcg_gen_movi_tl(tcg_ctx, tcg_imm32, imm_32);
+			tcg_gen_ext16s_tl(tcg_ctx, tcg_imm32, tcg_imm32);
+
+			tcg_gen_muls2_i32(tcg_ctx, r2, tcg_r3, tcg_imm32, r2);
+
+			if(rs2!=int_rs3){
+				gen_set_gpr(tcg_ctx, rs2, r2);
+			}
+			gen_set_gpr(tcg_ctx, int_rs3, tcg_r3);
+			break;
+
+		case OPC_RH850_MULH_reg1_reg2:
+
+			tcg_gen_andi_tl(tcg_ctx, r1, r1,0x0000FFFF);
+			tcg_gen_andi_tl(tcg_ctx, r2, r2,0x0000FFFF);
+			tcg_gen_ext16s_i32(tcg_ctx, r1, r1);
+			tcg_gen_ext16s_i32(tcg_ctx, r2, r2);
+
+			tcg_gen_mul_tl(tcg_ctx, r2, r2, r1);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			break;
+
+		case OPC_RH850_MULH_imm5_reg2:
+
+			if ((imm & 0x10) == 0x10){
+				imm = imm | (0x7 << 5);
+			}
+			tcg_gen_andi_tl(tcg_ctx, r2, r2,0x0000FFFF);
+			tcg_gen_ext16s_i32(tcg_ctx, r2, r2);
+
+			tcg_gen_movi_tl(tcg_ctx, tcg_imm, imm);
+			tcg_gen_ext8s_i32(tcg_ctx, tcg_imm, tcg_imm);
+			tcg_gen_mul_tl(tcg_ctx, r2, r2, tcg_imm);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			break;
+
+		case OPC_RH850_MULHI_imm16_reg1_reg2:
+
+			imm_32 = extract32(ctx->opcode, 16, 16);
+			tcg_gen_movi_tl(tcg_ctx, tcg_imm32, imm_32);
+			tcg_gen_ext16s_i32(tcg_ctx, tcg_imm32, tcg_imm32);
+
+			tcg_gen_andi_tl(tcg_ctx, r1, r1, 0x0000FFFF);
+			tcg_gen_ext16s_i32(tcg_ctx, r1, r1);
+
+			tcg_gen_mul_tl(tcg_ctx, r2, r1, tcg_imm32);
+
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			break;
+
+		case OPC_RH850_MULU_reg1_reg2_reg3:
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3,int_rs3);
+
+			tcg_gen_mulu2_i32(tcg_ctx, r2, tcg_r3, r2, r1);
+
+			if(rs2!=int_rs3){
+				gen_set_gpr(tcg_ctx, rs2, r2);
+			}
+			gen_set_gpr(tcg_ctx, int_rs3,tcg_r3);
+			break;
+
+		case OPC_RH850_MULU_imm9_reg2_reg3:
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3,int_rs3);
+
+			imm_32 = extract32(ctx->opcode, 18, 4);
+			imm_32 = imm | (imm_32 << 5);
+			tcg_gen_movi_tl(tcg_ctx, tcg_imm32, imm_32);
+
+			tcg_gen_ext16u_tl(tcg_ctx, tcg_imm32, tcg_imm32);
+
+			tcg_gen_mulu2_i32(tcg_ctx, r2, tcg_r3, tcg_imm32, r2);
+
+			if(rs2!=int_rs3){
+				gen_set_gpr(tcg_ctx, rs2, r2);
+			}
+			gen_set_gpr(tcg_ctx, int_rs3,tcg_r3);
+			break;
+	}
+
+	tcg_temp_free(tcg_ctx, r1);
+	tcg_temp_free(tcg_ctx, r2);
+	tcg_temp_free(tcg_ctx, tcg_r3);
+	tcg_temp_free(tcg_ctx, tcg_temp);
+	tcg_temp_free(tcg_ctx, tcg_imm);
+	tcg_temp_free(tcg_ctx, tcg_imm32);
+}
+
+static void gen_mul_accumulate(DisasContext *ctx, int rs1, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGv r1 = tcg_temp_new(tcg_ctx);
+	TCGv r2 = tcg_temp_new(tcg_ctx);
+	TCGv addLo = tcg_temp_new(tcg_ctx);
+	TCGv addHi = tcg_temp_new(tcg_ctx);
+	TCGv resLo = tcg_temp_new(tcg_ctx);
+	TCGv resHi = tcg_temp_new(tcg_ctx);
+	TCGv destLo = tcg_temp_new(tcg_ctx);
+	TCGv destHi = tcg_temp_new(tcg_ctx);
+
+	gen_get_gpr(tcg_ctx, r1, rs1);
+	gen_get_gpr(tcg_ctx, r2, rs2);
+
+	int rs3;
+	int rs4;
+
+	rs3 = extract32(ctx->opcode, 28, 4) << 1;
+	rs4 = extract32(ctx->opcode, 17, 4) << 1;
+
+	gen_get_gpr(tcg_ctx, addLo, rs3);
+	gen_get_gpr(tcg_ctx, addHi, rs3+1);
+
+	switch(operation){
+		case OPC_RH850_MAC_reg1_reg2_reg3_reg4:
+
+			tcg_gen_muls2_i32(tcg_ctx, resLo, resHi, r1, r2);
+			tcg_gen_add2_i32(tcg_ctx, destLo, destHi, resLo, resHi, addLo, addHi);
+
+			gen_set_gpr(tcg_ctx, rs4, destLo);
+			gen_set_gpr(tcg_ctx, rs4+1, destHi);
+			break;
+
+		case OPC_RH850_MACU_reg1_reg2_reg3_reg4:
+			tcg_gen_mulu2_i32(tcg_ctx, resLo, resHi, r1, r2);
+			tcg_gen_add2_i32(tcg_ctx, destLo, destHi, resLo, resHi, addLo, addHi);
+
+			gen_set_gpr(tcg_ctx, rs4, destLo);
+			gen_set_gpr(tcg_ctx, (rs4+1), destHi);
+			break;
+	}
+
+    tcg_temp_free(tcg_ctx, r1);
+    tcg_temp_free(tcg_ctx, r2);
+    tcg_temp_free(tcg_ctx, addLo);
+    tcg_temp_free(tcg_ctx, addHi);
+    tcg_temp_free(tcg_ctx, resLo);
+    tcg_temp_free(tcg_ctx, resHi);
+    tcg_temp_free(tcg_ctx, destLo);
+    tcg_temp_free(tcg_ctx, destHi);
+
+}
+
+static void gen_arithmetic(DisasContext *ctx, int rs1, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGv r1 = tcg_temp_new(tcg_ctx);
+	TCGv r2 = tcg_temp_new(tcg_ctx);
+	gen_get_gpr(tcg_ctx, r1, rs1);
+	gen_get_gpr(tcg_ctx, r2, rs2);
+
+	int imm = rs1;
+	int imm_32;
+	uint64_t opcode48;
+
+	TCGv tcg_imm = tcg_temp_new(tcg_ctx);
+	TCGv tcg_r3 = tcg_temp_new(tcg_ctx);
+	TCGv tcg_result = tcg_temp_new(tcg_ctx);
+
+	switch(operation) {
+
+		case OPC_RH850_ADD_reg1_reg2: {
+
+			tcg_gen_add_tl(tcg_ctx, tcg_result, r2, r1);
+			gen_set_gpr(tcg_ctx, rs2, tcg_result);
+
+			gen_flags_on_add(tcg_ctx, r1, r2);
+
+		}	break;
+
+		case OPC_RH850_ADD_imm5_reg2:
+			if((imm & 0x10) == 0x10){
+				imm = imm | (0x7 << 5);
+			}
+			tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm);
+			tcg_gen_ext8s_i32(tcg_ctx, tcg_imm, tcg_imm);
+			tcg_gen_add_tl(tcg_ctx, tcg_result, r2, tcg_imm);
+			gen_set_gpr(tcg_ctx, rs2, tcg_result);
+
+			gen_flags_on_add(tcg_ctx, r2, tcg_imm);
+
+			break;
+
+		case OPC_RH850_ADDI_imm16_reg1_reg2:
+			imm_32 = extract32(ctx->opcode, 16, 16);
+			tcg_gen_movi_tl(tcg_ctx, tcg_imm, imm_32);
+			tcg_gen_ext16s_tl(tcg_ctx, tcg_imm, tcg_imm);
+			tcg_gen_add_tl(tcg_ctx, r2,r1, tcg_imm);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+
+			gen_flags_on_add(tcg_ctx, r1, tcg_imm);
+
+			break;
+
+		case OPC_RH850_CMP_reg1_reg2:	{
+			gen_flags_on_sub(tcg_ctx, r2, r1);
+		}	break;
+
+		case OPC_RH850_CMP_imm5_reg2:	{
+
+			if ((imm & 0x10) == 0x10){
+				imm = imm | (0x7 << 5);
+			}
+			tcg_gen_movi_tl(tcg_ctx, tcg_imm, imm);
+			tcg_gen_ext8s_i32(tcg_ctx, tcg_imm, tcg_imm);
+
+			gen_flags_on_sub(tcg_ctx, r2, tcg_imm);
+
+		}	break;
+
+		case OPC_RH850_MOV_reg1_reg2:
+			tcg_gen_mov_tl(tcg_ctx, r2, r1);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			break;
+
+		case OPC_RH850_MOV_imm5_reg2:
+			if ((imm & 0x10) == 0x10){
+				imm = imm | (0x7 << 5);
+			}
+			tcg_gen_movi_tl(tcg_ctx, r2, imm);
+			tcg_gen_ext8s_i32(tcg_ctx, r2, r2);
+
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			break;
+
+		case OPC_RH850_MOV_imm32_reg1:	// 48bit instruction
+			opcode48 = (ctx->opcode1);
+			opcode48 = (ctx->opcode) | (opcode48  << 0x20);
+			imm_32 = extract64(opcode48, 16, 32) & 0xffffffff;
+			tcg_gen_movi_i32(tcg_ctx, r2, imm_32);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			break;
+
+		case OPC_RH850_MOVEA_imm16_reg1_reg2:
+			imm_32 = extract32(ctx->opcode, 16, 16);
+			tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm_32);
+			tcg_gen_ext16s_i32(tcg_ctx, tcg_imm, tcg_imm);
+
+			tcg_gen_add_i32(tcg_ctx, r2, tcg_imm, r1);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			break;
+
+		case OPC_RH850_MOVHI_imm16_reg1_reg2:
+			imm_32 = extract32(ctx->opcode, 16, 16);
+			tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm_32);
+			tcg_gen_shli_i32(tcg_ctx, tcg_imm, tcg_imm, 0x10);
+
+			tcg_gen_add_i32(tcg_ctx, r2, tcg_imm, r1);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			break;
+
+		case OPC_RH850_SUB_reg1_reg2:
+
+			tcg_gen_sub_tl(tcg_ctx, tcg_result, r2, r1);
+			gen_set_gpr(tcg_ctx, rs2, tcg_result);
+			gen_flags_on_sub(tcg_ctx, r2, r1);
+			break;
+
+		case OPC_RH850_SUBR_reg1_reg2:
+			tcg_gen_sub_tl(tcg_ctx, tcg_result, r1, r2);
+			gen_set_gpr(tcg_ctx, rs2, tcg_result);
+			gen_flags_on_sub(tcg_ctx, r1, r2);
+			break;
+	}
+
+	tcg_temp_free(tcg_ctx, r1);
+	tcg_temp_free(tcg_ctx, r2);
+    tcg_temp_free(tcg_ctx, tcg_imm);
+	tcg_temp_free(tcg_ctx, tcg_r3);
+	tcg_temp_free(tcg_ctx, tcg_result);
+}
+
+static void gen_cond_arith(DisasContext *ctx, int rs1, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGv r1 = tcg_temp_local_new(tcg_ctx);
+	TCGv r2 = tcg_temp_local_new(tcg_ctx);
+
+	TCGLabel *cont;
+
+	gen_get_gpr(tcg_ctx, r1, rs1);
+	gen_get_gpr(tcg_ctx, r2, rs2);
+
+	int int_rs3;
+	int int_cond;
+
+    switch(operation){
+
+		case OPC_RH850_ADF_cccc_reg1_reg2_reg3:{
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv addIfCond = tcg_temp_local_new_i32(tcg_ctx);
+            TCGv carry = tcg_temp_local_new_i32(tcg_ctx);
+            TCGv overflow = tcg_temp_local_new_i32(tcg_ctx);
+
+            tcg_gen_movi_tl(tcg_ctx, carry, 0);
+            tcg_gen_movi_tl(tcg_ctx, overflow, 0);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			int_cond = extract32(ctx->opcode, 17, 4);
+			if(int_cond == 0xd){
+				//throw exception/warning for inappropriate condition (SA)
+				break;
+			}
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, r2);
+			gen_get_gpr(tcg_ctx, r3_local,int_rs3);
+			tcg_gen_movi_i32(tcg_ctx, addIfCond, 0x1);
+
+			TCGv condResult = condition_satisfied(tcg_ctx, int_cond);
+			cont = gen_new_label(tcg_ctx);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, condResult, 0x1, cont);
+			  // calc and store CY and OV flags to be used to obtain final values
+              gen_flags_on_add(tcg_ctx, r2_local, addIfCond);
+              tcg_gen_mov_tl(tcg_ctx, carry, cpu_CYF);
+              tcg_gen_mov_tl(tcg_ctx, overflow, cpu_OVF);
+              // on cond true, add 1
+              tcg_gen_add_tl(tcg_ctx, r2_local, r2_local, addIfCond);
+
+			gen_set_label(tcg_ctx, cont);
+            tcg_gen_add_tl(tcg_ctx, r3_local, r1_local, r2_local);
+			gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+
+			gen_flags_on_add(tcg_ctx, r1_local, r2_local);
+			tcg_gen_or_tl(tcg_ctx, cpu_CYF, cpu_CYF, carry);
+            tcg_gen_or_tl(tcg_ctx, cpu_OVF, cpu_OVF, overflow);
+
+		    tcg_temp_free(tcg_ctx, condResult);
+			tcg_temp_free_i32(tcg_ctx, r1_local);
+			tcg_temp_free_i32(tcg_ctx, r2_local);
+			tcg_temp_free_i32(tcg_ctx, r3_local);
+            tcg_temp_free_i32(tcg_ctx, addIfCond);
+		}
+			break;
+
+		case OPC_RH850_SBF_cccc_reg1_reg2_reg3:{
+
+		    int_rs3 = extract32(ctx->opcode, 27, 5);
+            int_cond = extract32(ctx->opcode, 17, 4);
+            if(int_cond == 0xd){
+                //throw exception/warning for inappropriate condition (SA)
+                break;
+            }
+
+			TCGv r3_local = tcg_temp_local_new(tcg_ctx);
+			TCGv tmpReg = tcg_temp_local_new(tcg_ctx);
+            TCGv carry = tcg_temp_local_new(tcg_ctx);
+            TCGv overflow = tcg_temp_local_new(tcg_ctx);
+            cont = gen_new_label(tcg_ctx);
+
+            tcg_gen_movi_tl(tcg_ctx, carry, 0);
+            tcg_gen_movi_tl(tcg_ctx, overflow, 0);
+
+			tcg_gen_mov_i32(tcg_ctx, r3_local, r2);
+
+            TCGv condResult = condition_satisfied(tcg_ctx, int_cond);
+            // store to local temp, because condResult is valid only until branch in gen_flags_on_sub
+            tcg_gen_mov_tl(tcg_ctx, tmpReg, condResult);
+
+            gen_flags_on_sub(tcg_ctx, r3_local, r1);
+            tcg_gen_mov_tl(tcg_ctx, carry, cpu_CYF);
+            tcg_gen_mov_tl(tcg_ctx, overflow, cpu_OVF);
+            tcg_gen_sub_tl(tcg_ctx, r3_local, r3_local, r1);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, tmpReg, 0x1, cont);
+              tcg_gen_movi_i32(tcg_ctx, tmpReg, 0x1);
+              gen_flags_on_sub(tcg_ctx, r3_local, tmpReg);
+              tcg_gen_subi_tl(tcg_ctx, r3_local, r3_local, 1);
+              tcg_gen_or_tl(tcg_ctx, cpu_CYF, cpu_CYF, carry);
+              // overflow twice means no overflow
+              tcg_gen_xor_tl(tcg_ctx, cpu_OVF, cpu_OVF, overflow);
+
+            gen_set_label(tcg_ctx, cont);
+
+			gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+
+            tcg_temp_free(tcg_ctx, condResult);
+			tcg_temp_free_i32(tcg_ctx, r3_local);
+            tcg_temp_free_i32(tcg_ctx, tmpReg);
+            tcg_temp_free_i32(tcg_ctx, overflow);
+            tcg_temp_free_i32(tcg_ctx, carry);
+		}
+			break;
+	}
+
+	tcg_temp_free_i32(tcg_ctx, r1);
+	tcg_temp_free_i32(tcg_ctx, r2);
+}
+
+static void gen_sat_op(DisasContext *ctx, int rs1, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGv r1 = tcg_temp_new(tcg_ctx);
+	TCGv r2 = tcg_temp_new(tcg_ctx);
+	gen_get_gpr(tcg_ctx, r1, rs1);
+	gen_get_gpr(tcg_ctx, r2, rs2);
+
+	int imm = rs1;
+	int int_rs3;
+
+	TCGLabel *end;
+	TCGLabel *cont;
+	TCGLabel *cont2;
+	TCGLabel *setMax;
+	TCGLabel *dontChange;
+
+	switch(operation){
+
+		case OPC_RH850_SATADD_reg1_reg2: {
+
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv min = tcg_temp_local_new(tcg_ctx);
+			TCGv max = tcg_temp_local_new(tcg_ctx);
+			TCGv zero = tcg_temp_local_new(tcg_ctx);
+			tcg_gen_movi_i32(tcg_ctx, min, 0x80000000);
+			tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff);
+			tcg_gen_mov_i32(tcg_ctx, r1_local, r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, r2);
+			tcg_gen_movi_i32(tcg_ctx, zero, 0x0);
+			end = gen_new_label(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			cont2 = gen_new_label(tcg_ctx);
+
+
+			tcg_gen_add_i32(tcg_ctx, result, r1_local, r2_local);
+
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont);
+
+			tcg_gen_sub_i32(tcg_ctx, check, max, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end);
+			tcg_gen_mov_i32(tcg_ctx, result, max);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+			tcg_gen_br(tcg_ctx, end);
+
+			//---------------------------------------------------------------------------------
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_sub_i32(tcg_ctx, check, min, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2);
+			tcg_gen_mov_i32(tcg_ctx, result, min);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+
+			gen_set_label(tcg_ctx, cont2);
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, rs2, result);
+
+			gen_satadd_CC(tcg_ctx, r1_local, r2_local, result);  // moves also SET flag to psw
+
+			tcg_temp_free(tcg_ctx, result);
+			tcg_temp_free(tcg_ctx, check);
+			tcg_temp_free(tcg_ctx, min);
+			tcg_temp_free(tcg_ctx, max);
+			tcg_temp_free(tcg_ctx, r1_local);
+			tcg_temp_free(tcg_ctx, r2_local);
+			tcg_temp_free(tcg_ctx, zero);
+
+		}	break;
+
+		case OPC_RH850_SATADD_imm5_reg2: {
+
+			TCGv imm_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv min = tcg_temp_local_new(tcg_ctx);
+			TCGv max = tcg_temp_local_new(tcg_ctx);
+			TCGv zero = tcg_temp_local_new(tcg_ctx);
+			tcg_gen_movi_i32(tcg_ctx, min, 0x80000000);
+			tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, r2);
+			tcg_gen_movi_i32(tcg_ctx, zero, 0x0);
+			end = gen_new_label(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			cont2 = gen_new_label(tcg_ctx);
+
+			if ((imm & 0x10) == 0x10){
+				imm = imm | (0x7 << 5);
+			}
+
+			tcg_gen_movi_tl(tcg_ctx, imm_local, imm);
+			tcg_gen_ext8s_tl(tcg_ctx, imm_local, imm_local);
+
+			tcg_gen_add_i32(tcg_ctx, result, imm_local, r2_local);
+
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, imm_local, zero, cont);
+
+			tcg_gen_sub_i32(tcg_ctx, check, max, imm_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end);
+			tcg_gen_mov_i32(tcg_ctx, result, max);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+			tcg_gen_br(tcg_ctx, end);
+
+			//---------------------------------------------------------------------------------
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_sub_i32(tcg_ctx, check, min, imm_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2);
+			tcg_gen_mov_i32(tcg_ctx, result, min);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+
+			gen_set_label(tcg_ctx, cont2);
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, rs2, result);
+
+			gen_satadd_CC(tcg_ctx, r2_local, imm_local, result);
+
+			tcg_temp_free(tcg_ctx, result);
+			tcg_temp_free(tcg_ctx, check);
+			tcg_temp_free(tcg_ctx, min);
+			tcg_temp_free(tcg_ctx, max);
+			tcg_temp_free(tcg_ctx, imm_local);
+			tcg_temp_free(tcg_ctx, r2_local);
+			tcg_temp_free(tcg_ctx, zero);
+
+		}	break;
+
+		case OPC_RH850_SATADD_reg1_reg2_reg3: {
+
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv min = tcg_temp_local_new(tcg_ctx);
+			TCGv max = tcg_temp_local_new(tcg_ctx);
+			TCGv zero = tcg_temp_local_new(tcg_ctx);
+			tcg_gen_movi_i32(tcg_ctx, min, 0x80000000);
+			tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff);
+			tcg_gen_mov_i32(tcg_ctx, r1_local, r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, r2);
+			tcg_gen_movi_i32(tcg_ctx, zero, 0x0);
+			end = gen_new_label(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			cont2 = gen_new_label(tcg_ctx);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			tcg_gen_add_i32(tcg_ctx, result, r1_local, r2_local);
+
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont);		//if (r1 > 0)
+
+			tcg_gen_sub_i32(tcg_ctx, check, max, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end);			//if (r2 > MAX-r1)
+			tcg_gen_mov_i32(tcg_ctx, result, max);										//return MAX;
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+			tcg_gen_br(tcg_ctx, end);
+
+			//---------------------------------------------------------------------------------
+			gen_set_label(tcg_ctx, cont); 										//else
+			tcg_gen_sub_i32(tcg_ctx, check, min, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2);		//if (r2 < MIN-r1)
+			tcg_gen_mov_i32(tcg_ctx, result, min);										//return MIN;
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+
+			gen_set_label(tcg_ctx, cont2);
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, int_rs3, result);
+
+			gen_satadd_CC(tcg_ctx, r1_local, r2_local, result);
+
+			tcg_temp_free(tcg_ctx, result);
+			tcg_temp_free(tcg_ctx, check);
+			tcg_temp_free(tcg_ctx, min);
+			tcg_temp_free(tcg_ctx, max);
+			tcg_temp_free(tcg_ctx, r1_local);
+			tcg_temp_free(tcg_ctx, r2_local);
+			tcg_temp_free(tcg_ctx, zero);
+
+		}	break;
+
+		case OPC_RH850_SATSUB_reg1_reg2: {
+
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv min = tcg_temp_local_new(tcg_ctx);
+			TCGv max = tcg_temp_local_new(tcg_ctx);
+			TCGv zero = tcg_temp_local_new(tcg_ctx);
+			tcg_gen_movi_i32(tcg_ctx, min, 0x80000000);
+			tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff);
+			tcg_gen_mov_i32(tcg_ctx, r1_local, r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, r2);
+			tcg_gen_movi_i32(tcg_ctx, zero, 0x0);
+			end = gen_new_label(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			cont2 = gen_new_label(tcg_ctx);
+			setMax = gen_new_label(tcg_ctx);
+			dontChange = gen_new_label(tcg_ctx);
+
+			/*
+			 * Negating second operand and using satadd code. When negating an operand
+			 * with value 0x80000000, the result overflows positive numbers and is not
+			 * negated. If this happens, the operand is first incremented, and then negated.
+			 * The second operand is as well incremented, if it's value is less than 0x7fffffff.
+			 * Otherwise, the result is set to MAX and SATF is set.
+			 * This was done in all following saturated subtraction functions.
+			 */
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x80000000, dontChange);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r2_local, 0x7fffffff, setMax);
+
+			tcg_gen_addi_i32(tcg_ctx, r1_local, r1_local, 0x1);
+			tcg_gen_addi_i32(tcg_ctx, r2_local, r2_local, 0x1);
+			gen_set_label(tcg_ctx, dontChange);
+
+			tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local);
+			tcg_gen_add_i32(tcg_ctx, result, r1_local, r2_local);
+
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont);
+
+			tcg_gen_sub_i32(tcg_ctx, check, max, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end);
+			gen_set_label(tcg_ctx, setMax);
+			tcg_gen_mov_i32(tcg_ctx, result, max);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+			tcg_gen_br(tcg_ctx, end);
+
+			//---------------------------------------------------------------------------------
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_sub_i32(tcg_ctx, check, min, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2);
+			tcg_gen_mov_i32(tcg_ctx, result, min);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+
+			gen_set_label(tcg_ctx, cont2);
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, rs2, result);
+
+			// second negation is needed for appropriate flag calculation
+			tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local);
+			gen_satsub_CC(tcg_ctx, r2_local, r1_local, result);
+
+			tcg_temp_free(tcg_ctx, result);
+			tcg_temp_free(tcg_ctx, check);
+			tcg_temp_free(tcg_ctx, min);
+			tcg_temp_free(tcg_ctx, max);
+			tcg_temp_free(tcg_ctx, r1_local);
+			tcg_temp_free(tcg_ctx, r2_local);
+			tcg_temp_free(tcg_ctx, zero);
+
+		}	break;
+
+		case OPC_RH850_SATSUB_reg1_reg2_reg3: {
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv min = tcg_temp_local_new(tcg_ctx);
+			TCGv max = tcg_temp_local_new(tcg_ctx);
+			TCGv zero = tcg_temp_local_new(tcg_ctx);
+			tcg_gen_movi_i32(tcg_ctx, min, 0x80000000);
+			tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff);
+			tcg_gen_mov_i32(tcg_ctx, r1_local, r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, r2);
+			tcg_gen_movi_i32(tcg_ctx, zero, 0x0);
+			end = gen_new_label(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			cont2 = gen_new_label(tcg_ctx);
+			setMax = gen_new_label(tcg_ctx);
+			dontChange = gen_new_label(tcg_ctx);
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x80000000, dontChange);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r2_local, 0x7fffffff, setMax);
+
+			tcg_gen_addi_i32(tcg_ctx, r1_local, r1_local, 0x1);
+			tcg_gen_addi_i32(tcg_ctx, r2_local, r2_local, 0x1);
+			gen_set_label(tcg_ctx, dontChange);
+
+			tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local);
+			tcg_gen_add_i32(tcg_ctx, result, r1_local, r2_local);
+
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont);
+
+			tcg_gen_sub_i32(tcg_ctx, check, max, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end);
+			gen_set_label(tcg_ctx, setMax);
+			tcg_gen_mov_i32(tcg_ctx, result, max);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+			tcg_gen_br(tcg_ctx, end);
+
+			//---------------------------------------------------------------------------------
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_sub_i32(tcg_ctx, check, min, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2);
+			tcg_gen_mov_i32(tcg_ctx, result, min);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+
+			gen_set_label(tcg_ctx, cont2);
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, int_rs3, result);
+
+			tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local);
+			gen_satsub_CC(tcg_ctx, r2_local, r1_local, result);
+
+			tcg_temp_free(tcg_ctx, result);
+			tcg_temp_free(tcg_ctx, check);
+			tcg_temp_free(tcg_ctx, min);
+			tcg_temp_free(tcg_ctx, max);
+			tcg_temp_free(tcg_ctx, r1_local);
+			tcg_temp_free(tcg_ctx, r2_local);
+			tcg_temp_free(tcg_ctx, zero); 
+
+		}	break;
+
+		case OPC_RH850_SATSUBI_imm16_reg1_reg2: {
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv imm_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv min = tcg_temp_local_new(tcg_ctx);
+			TCGv max = tcg_temp_local_new(tcg_ctx);
+			TCGv zero = tcg_temp_local_new(tcg_ctx);
+			tcg_gen_movi_i32(tcg_ctx, min, 0x80000000);
+			tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff);
+			tcg_gen_mov_i32(tcg_ctx, r1_local, r1);
+			imm = extract32(ctx->opcode, 16, 16);
+			tcg_gen_movi_i32(tcg_ctx, imm_local, imm);
+			tcg_gen_ext16s_i32(tcg_ctx, imm_local, imm_local);
+			tcg_gen_movi_i32(tcg_ctx, zero, 0x0);
+			end = gen_new_label(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			cont2 = gen_new_label(tcg_ctx);
+			setMax = gen_new_label(tcg_ctx);
+			dontChange = gen_new_label(tcg_ctx);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x80000000, dontChange);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, imm_local, 0x7fffffff, setMax);
+
+			tcg_gen_addi_i32(tcg_ctx, r1_local, r1_local, 0x1);
+			tcg_gen_addi_i32(tcg_ctx, imm_local, imm_local, 0x1);
+			gen_set_label(tcg_ctx, dontChange);
+
+
+			tcg_gen_neg_i32(tcg_ctx, imm_local, imm_local);
+
+			tcg_gen_add_i32(tcg_ctx, result, r1_local, imm_local);
+
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont);
+
+			tcg_gen_sub_i32(tcg_ctx, check, max, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, imm_local, check, end);
+			gen_set_label(tcg_ctx, setMax);
+			tcg_gen_mov_i32(tcg_ctx, result, max);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+			tcg_gen_br(tcg_ctx, end);
+
+			//---------------------------------------------------------------------------------
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_sub_i32(tcg_ctx, check, min, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, imm_local, check, cont2);
+			tcg_gen_mov_i32(tcg_ctx, result, min);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+
+			gen_set_label(tcg_ctx, cont2);
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, rs2, result);
+
+			tcg_gen_neg_i32(tcg_ctx, imm_local, imm_local);
+			gen_satsub_CC(tcg_ctx, r1_local, imm_local, result);
+
+			tcg_temp_free(tcg_ctx, result);
+			tcg_temp_free(tcg_ctx, check);
+			tcg_temp_free(tcg_ctx, min);
+			tcg_temp_free(tcg_ctx, max);
+			tcg_temp_free(tcg_ctx, r1_local);
+			tcg_temp_free(tcg_ctx, imm_local);
+			tcg_temp_free(tcg_ctx, zero);
+
+		}	break;
+
+		case OPC_RH850_SATSUBR_reg1_reg2: {
+
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv min = tcg_temp_local_new(tcg_ctx);
+			TCGv max = tcg_temp_local_new(tcg_ctx);
+			TCGv zero = tcg_temp_local_new(tcg_ctx);
+			tcg_gen_movi_i32(tcg_ctx, min, 0x80000000);
+			tcg_gen_movi_i32(tcg_ctx, max, 0x7fffffff);
+			tcg_gen_mov_i32(tcg_ctx, r1_local, r2);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, r1);
+			tcg_gen_movi_i32(tcg_ctx, zero, 0x0);
+			end = gen_new_label(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			cont2 = gen_new_label(tcg_ctx);
+			setMax = gen_new_label(tcg_ctx);
+			dontChange = gen_new_label(tcg_ctx);
+
+			/*
+			 * Negating second operand and using satadd code. When negating an operand
+			 * with value 0x80000000, the result overflows positive numbers and is not
+			 * negated. If this happens, the operand is first incremented, and then negated.
+			 * The second operand is as well incremented, if it's value is less than 0x7fffffff.
+			 * Otherwise, the result is set to MAX and SATF is set.
+			 * This was done in all following saturated subtraction functions.
+			 */
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x80000000, dontChange);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r2_local, 0x7fffffff, setMax);
+
+			tcg_gen_addi_i32(tcg_ctx, r1_local, r1_local, 0x1);
+			tcg_gen_addi_i32(tcg_ctx, r2_local, r2_local, 0x1);
+			gen_set_label(tcg_ctx, dontChange);
+
+			tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local);
+			tcg_gen_add_i32(tcg_ctx, result, r1_local, r2_local);
+
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LT, r1_local, zero, cont);
+
+			tcg_gen_sub_i32(tcg_ctx, check, max, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_LE, r2_local, check, end);
+			gen_set_label(tcg_ctx, setMax);
+			tcg_gen_mov_i32(tcg_ctx, result, max);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+			tcg_gen_br(tcg_ctx, end);
+
+			//---------------------------------------------------------------------------------
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_sub_i32(tcg_ctx, check, min, r1_local);
+			tcg_gen_brcond_tl(tcg_ctx, TCG_COND_GE, r2_local, check, cont2);
+			tcg_gen_mov_i32(tcg_ctx, result, min);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SATF, 0x1);
+
+			gen_set_label(tcg_ctx, cont2);
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, rs2, result);
+
+			tcg_gen_neg_i32(tcg_ctx, r1_local, r1_local);
+			gen_satsub_CC(tcg_ctx, r2_local, r1_local, result);
+
+			tcg_temp_free(tcg_ctx, result);
+			tcg_temp_free(tcg_ctx, check);
+			tcg_temp_free(tcg_ctx, min);
+			tcg_temp_free(tcg_ctx, max);
+			tcg_temp_free(tcg_ctx, r1_local);
+			tcg_temp_free(tcg_ctx, r2_local);
+			tcg_temp_free(tcg_ctx, zero);
+
+		}	break;
+	}
+
+	tcg_temp_free(tcg_ctx, r1);
+	tcg_temp_free(tcg_ctx, r2);
+}
+
+static void gen_logical(DisasContext *ctx, int rs1, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGv r1 = tcg_temp_new(tcg_ctx);
+	TCGv r2 = tcg_temp_new(tcg_ctx);
+	TCGv result = tcg_temp_new(tcg_ctx);
+	gen_get_gpr(tcg_ctx, r1, rs1);
+	gen_get_gpr(tcg_ctx, r2, rs2);
+
+	int imm_32;
+	TCGv tcg_imm = tcg_temp_new(tcg_ctx);
+
+	switch(operation){
+
+		case OPC_RH850_AND_reg1_reg2:
+			tcg_gen_and_tl(tcg_ctx, r2, r2, r1);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			gen_logic_CC(tcg_ctx, r2);
+			break;
+
+		case OPC_RH850_ANDI_imm16_reg1_reg2:
+			imm_32 = extract32(ctx->opcode, 16, 16);
+			tcg_gen_movi_tl(tcg_ctx, tcg_imm, imm_32);
+			tcg_gen_ext16u_i32(tcg_ctx, tcg_imm, tcg_imm);
+			tcg_gen_and_i32(tcg_ctx, r2, r1, tcg_imm);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			gen_logic_CC(tcg_ctx, r2);
+			break;
+
+		case OPC_RH850_NOT_reg1_reg2:
+			tcg_gen_not_i32(tcg_ctx, r2, r1);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			gen_logic_CC(tcg_ctx, r2);
+			break;
+
+		case OPC_RH850_OR_reg1_reg2:
+			tcg_gen_or_tl(tcg_ctx, r2, r2, r1);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			gen_logic_CC(tcg_ctx, r2);
+			break;
+
+		case OPC_RH850_ORI_imm16_reg1_reg2:
+			imm_32 = extract32(ctx->opcode, 16, 16);
+			tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm_32);
+			tcg_gen_ext16u_i32(tcg_ctx, tcg_imm,tcg_imm);
+
+			tcg_gen_or_i32(tcg_ctx, r2, r1, tcg_imm);
+			gen_set_gpr(tcg_ctx, rs2, r2);
+			gen_logic_CC(tcg_ctx, r2);
+			break;
+
+		case OPC_RH850_TST_reg1_reg2:
+			tcg_gen_and_i32(tcg_ctx, result, r1, r2);
+			gen_logic_CC(tcg_ctx, result);
+			break;
+
+		case OPC_RH850_XOR_reg1_reg2:
+			tcg_gen_xor_i32(tcg_ctx, result, r2, r1);
+			gen_set_gpr(tcg_ctx, rs2, result);
+			gen_logic_CC(tcg_ctx, result);
+			break;
+
+		case OPC_RH850_XORI_imm16_reg1_reg2:
+			imm_32 = extract32(ctx->opcode, 16, 16);
+			tcg_gen_movi_i32(tcg_ctx, tcg_imm, imm_32);
+			tcg_gen_ext16u_i32(tcg_ctx, tcg_imm,tcg_imm);
+
+			tcg_gen_xor_i32(tcg_ctx, result, r1, tcg_imm);
+			gen_set_gpr(tcg_ctx, rs2, result);
+			gen_logic_CC(tcg_ctx, result);
+			break;
+	}
+
+	tcg_temp_free(tcg_ctx, r1);
+	tcg_temp_free(tcg_ctx, r2);
+	tcg_temp_free(tcg_ctx, tcg_imm);
+	tcg_temp_free(tcg_ctx, result);
+}
+
+static void gen_data_manipulation(DisasContext *ctx, int rs1, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGv tcg_r1 = tcg_temp_new(tcg_ctx);
+	TCGv tcg_r2 = tcg_temp_new(tcg_ctx);
+	TCGv tcg_r3 = tcg_temp_new(tcg_ctx);
+	TCGv tcg_imm = tcg_temp_new(tcg_ctx);
+	TCGv tcg_temp = tcg_temp_new(tcg_ctx);
+	TCGv tcg_temp2 = tcg_temp_new(tcg_ctx);
+	TCGv insert = tcg_temp_new(tcg_ctx);
+
+	TCGLabel *cont;
+	TCGLabel *end;
+	TCGLabel *set;
+
+	int int_imm = rs1;
+	int int_rs3;
+	int int_cond;
+	int pos;
+	int lsb;
+	int msb;
+	int width;
+	int mask;
+	int group;
+
+	gen_get_gpr(tcg_ctx, tcg_r1, rs1);
+	gen_get_gpr(tcg_ctx, tcg_r2, rs2);
+
+	switch(operation) {
+
+		case OPC_RH850_BINS:
+
+			group = extract32(ctx->opcode, 21, 2);
+
+			mask = 0;
+			pos = extract32(ctx->opcode, 17, 3) | (extract32(ctx->opcode, 27, 1) << 3);
+			lsb = pos;
+
+			msb = extract32(ctx->opcode, 28, 4);
+			width = extract32(ctx->opcode, 28, 4) - pos + 1;
+
+			switch(group){
+			case 0:			//bins0
+				pos += 16;
+				break;
+			case 1:			//bins1
+				width += 16;
+				msb+=16;
+				break;
+			case 2:			//bins2
+				break;
+			}
+
+			if(msb<lsb){
+				tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, tcg_r2, 0x0);
+				tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, tcg_r2, 0x0);
+				tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+				break;
+			}
+
+			for(int i = 0; i < width; i++){
+				mask = mask | (0x1 << i);
+			}
+
+			tcg_gen_andi_i32(tcg_ctx, insert, tcg_r1, mask);		//insert has the bits from reg1
+
+			tcg_gen_movi_i32(tcg_ctx, tcg_temp, mask);
+			tcg_gen_shli_i32(tcg_ctx, tcg_temp, tcg_temp, pos);	//inverting and shifting the mask
+			tcg_gen_not_i32(tcg_ctx, tcg_temp, tcg_temp);		//for deletion of bits in reg2
+
+			tcg_gen_and_i32(tcg_ctx, tcg_r2, tcg_r2, tcg_temp);	//deleting bits that will be replaced
+			tcg_gen_shli_i32(tcg_ctx, insert, insert, pos);		//shifting bits to right position
+			tcg_gen_or_i32(tcg_ctx, tcg_r2, tcg_r2, insert);		//placing bits into reg2
+
+			gen_set_gpr(tcg_ctx, rs2, tcg_r2);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, tcg_r2, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, tcg_r2, 0x0);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+			break;
+
+		case OPC_RH850_BSH_reg2_reg3: {
+
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv count_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx);
+
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			tcg_gen_mov_tl(tcg_ctx, tcg_temp2, tcg_r2);
+			tcg_gen_movi_i32(tcg_ctx, tcg_r3, 0x0);
+
+			tcg_gen_andi_tl(tcg_ctx, tcg_temp, tcg_temp2, 0xff000000);
+			tcg_gen_shri_tl(tcg_ctx, tcg_temp, tcg_temp, 0x8);
+			tcg_gen_or_tl(tcg_ctx, tcg_r3, tcg_r3, tcg_temp);
+
+			tcg_gen_andi_tl(tcg_ctx, tcg_temp, tcg_temp2, 0x00ff0000);
+			tcg_gen_shli_tl(tcg_ctx, tcg_temp, tcg_temp, 0x8);
+			tcg_gen_or_tl(tcg_ctx, tcg_r3, tcg_r3, tcg_temp);
+
+			tcg_gen_andi_tl(tcg_ctx, tcg_temp, tcg_temp2, 0x0000ff00);
+			tcg_gen_shri_tl(tcg_ctx, tcg_temp, tcg_temp, 0x8);
+			tcg_gen_or_tl(tcg_ctx, tcg_r3, tcg_r3, tcg_temp);
+
+			tcg_gen_andi_tl(tcg_ctx, tcg_temp, tcg_temp2, 0x000000ff);
+			tcg_gen_shli_tl(tcg_ctx, tcg_temp, tcg_temp, 0x8);
+			tcg_gen_or_tl(tcg_ctx, tcg_r3, tcg_r3, tcg_temp);
+
+			gen_set_gpr(tcg_ctx, int_rs3, tcg_r3);
+
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+			set = gen_new_label(tcg_ctx);
+			tcg_gen_andi_i32(tcg_ctx, temp_local, r3_local, 0x0000ffff);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, temp_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f);
+
+			tcg_gen_movi_i32(tcg_ctx, count_local, 0x0);
+
+			tcg_gen_andi_i32(tcg_ctx, temp_local, r3_local, 0x000000ff);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, temp_local, 0x0, set);
+			tcg_gen_andi_i32(tcg_ctx, temp_local, r3_local, 0x0000ff00);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, temp_local, 0x0, set);
+
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, set);////
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x1);
+
+			gen_set_label(tcg_ctx, end);////
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, count_local);
+            tcg_temp_free(tcg_ctx, temp_local);
+		}	break;
+
+		case OPC_RH850_BSW_reg2_reg3: {
+
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv count_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx);
+
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+			set = gen_new_label(tcg_ctx);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3,int_rs3);
+			tcg_gen_bswap32_i32(tcg_ctx, tcg_r3, tcg_r2);
+			gen_set_gpr(tcg_ctx, int_rs3, tcg_r3);
+
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r3_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f);
+
+			tcg_gen_movi_i32(tcg_ctx, count_local, 0x0);
+
+			gen_set_label(tcg_ctx, cont);////
+
+			tcg_gen_andi_i32(tcg_ctx, temp_local, r3_local, 0x000000ff);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, temp_local, 0x0, set);////
+			tcg_gen_addi_i32(tcg_ctx, count_local, count_local, 0x1);
+			tcg_gen_shri_i32(tcg_ctx, r3_local, r3_local, 0x8);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, count_local, 0x4, cont);////
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, set);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x1);
+
+			gen_set_label(tcg_ctx, end);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, count_local);
+            tcg_temp_free(tcg_ctx, temp_local);
+		}
+			break;
+
+		case OPC_RH850_CMOV_cccc_reg1_reg2_reg3: {
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			int_cond = extract32(ctx->opcode, 17, 4);
+			TCGv condResult = condition_satisfied(tcg_ctx, int_cond);
+			cont = gen_new_label(tcg_ctx);
+
+			tcg_gen_mov_tl(tcg_ctx, r3_local, r2_local);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, condResult, 0x1, cont);
+			  tcg_gen_mov_tl(tcg_ctx, r3_local, r1_local);
+			gen_set_label(tcg_ctx, cont);
+
+			gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+
+            tcg_temp_free(tcg_ctx, condResult);
+			tcg_temp_free_i32(tcg_ctx, r1_local);
+			tcg_temp_free_i32(tcg_ctx, r2_local);
+			tcg_temp_free_i32(tcg_ctx, r3_local);
+		}
+			break;
+
+		case OPC_RH850_CMOV_cccc_imm5_reg2_reg3: {
+
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+
+			if (int_imm & 0x10) {  // if is sign bit in imm5 set
+				int_imm = int_imm | 0xffffffe0;
+			}
+
+			int_cond = extract32(ctx->opcode, 17, 4);
+			TCGv condResult = condition_satisfied(tcg_ctx, int_cond);
+			cont = gen_new_label(tcg_ctx);
+
+			tcg_gen_mov_tl(tcg_ctx, r3_local, tcg_r2);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, condResult, 0x1, cont);
+			tcg_gen_movi_tl(tcg_ctx, r3_local, int_imm);
+
+			gen_set_label(tcg_ctx, cont);
+
+            int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+
+            tcg_temp_free(tcg_ctx, condResult);
+			tcg_temp_free_i32(tcg_ctx, r3_local);
+		}
+			break;
+
+		case OPC_RH850_HSH_reg2_reg3:
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_set_gpr(tcg_ctx, int_rs3, tcg_r2);
+
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, tcg_r2, 0x1f);
+			tcg_gen_andi_i32(tcg_ctx, tcg_temp, tcg_r2, 0x0000ffff);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, tcg_temp, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_CYF, tcg_temp, 0x0);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+			break;
+
+		case OPC_RH850_HSW_reg2_reg3: {
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp3_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv count_local = tcg_temp_local_new_i32(tcg_ctx);
+
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+			set = gen_new_label(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, r3_local,int_rs3);
+
+			tcg_gen_andi_tl(tcg_ctx, temp_local, r2_local, 0xffff);
+			tcg_gen_shli_tl(tcg_ctx, temp_local, temp_local, 0x10);
+			tcg_gen_andi_tl(tcg_ctx, temp2_local, r2_local, 0xffff0000);
+			tcg_gen_shri_tl(tcg_ctx, temp2_local, temp2_local, 0x10);
+
+			tcg_gen_or_tl(tcg_ctx, r3_local, temp2_local, temp_local);
+			gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r3_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f);
+
+			tcg_gen_movi_i32(tcg_ctx, count_local, 0x0);
+
+			gen_set_label(tcg_ctx, cont);
+
+			tcg_gen_andi_i32(tcg_ctx, temp3_local, r3_local, 0x0000ffff);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, temp3_local, 0x0, set);
+			tcg_gen_andi_i32(tcg_ctx, temp3_local, r3_local, 0xffff0000);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, temp3_local, 0x0, set);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, set);////
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x1);
+
+			gen_set_label(tcg_ctx, end);////
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, count_local);
+            tcg_temp_free(tcg_ctx, temp_local);
+            tcg_temp_free(tcg_ctx, temp2_local);
+            tcg_temp_free(tcg_ctx, temp3_local);
+		}
+			break;
+
+		case OPC_RH850_ROTL_imm5_reg2_reg3:
+		{
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv imm_local = tcg_temp_local_new_i32(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+
+			tcg_gen_movi_tl(tcg_ctx, tcg_imm, int_imm);
+			tcg_gen_ext8u_tl(tcg_ctx, tcg_imm, tcg_imm);
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3,int_rs3);
+			tcg_gen_rotl_tl(tcg_ctx, tcg_r3, tcg_r2, tcg_imm);
+			gen_set_gpr(tcg_ctx, int_rs3, tcg_r3);
+
+			tcg_gen_andi_i32(tcg_ctx, cpu_CYF, tcg_r3, 0x1);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, tcg_r3, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, tcg_r3, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+			tcg_gen_mov_i32(tcg_ctx, imm_local, tcg_imm);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, tcg_imm, 0x0, cont);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+			gen_set_label(tcg_ctx, cont);
+
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, imm_local);
+		}	break;
+
+		case OPC_RH850_ROTL_reg1_reg2_reg3:
+		{
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3,int_rs3);
+			tcg_gen_rotl_tl(tcg_ctx, tcg_r3, tcg_r2, tcg_r1);
+			gen_set_gpr(tcg_ctx,  int_rs3, tcg_r3);
+
+			tcg_gen_andi_i32(tcg_ctx, cpu_CYF, tcg_r3, 0x1);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, tcg_r3, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, tcg_r3, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, tcg_r1, 0x0, cont);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+			gen_set_label(tcg_ctx, cont);
+
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, r1_local);
+		}	break;
+
+		case OPC_RH850_SAR_reg1_reg2: {
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f);	//shift by value of lower 5 bits of reg1
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x0, cont);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, cont);
+
+			tcg_gen_subi_i32(tcg_ctx, r1_local, r1_local, 0x1);	//shift by r1-1
+
+			tcg_gen_sar_i32(tcg_ctx, r2_local, r2_local, r1_local);
+			tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r2_local, 0x1);	//LSB here is the last bit to be shifted
+			tcg_gen_sari_i32(tcg_ctx, r2_local, r2_local, 0x1);
+
+			gen_set_label(tcg_ctx, end);
+
+			gen_set_gpr(tcg_ctx, rs2, r2_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r1_local);
+		}
+			break;
+
+		case OPC_RH850_SAR_imm5_reg2: {
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+
+			tcg_gen_movi_tl(tcg_ctx, r1_local, int_imm);
+			tcg_gen_ext8u_i32(tcg_ctx, r1_local, r1_local);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x0, cont);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, cont);
+
+			tcg_gen_subi_i32(tcg_ctx, r1_local, r1_local, 0x1);	//shift by one less
+			tcg_gen_sar_i32(tcg_ctx, r2_local, r2_local, r1_local);
+			tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r2_local, 0x1);	//LSB here is the last bit to be shifted
+			tcg_gen_sari_i32(tcg_ctx, r2_local, r2_local, 0x1);
+
+			gen_set_label(tcg_ctx, end);
+
+			gen_set_gpr(tcg_ctx, rs2, r2_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+			tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r1_local);
+		}
+			break;
+
+		case OPC_RH850_SAR_reg1_reg2_reg3: {
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f);	//shift by only lower 5 bits of reg1
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, r3_local, int_rs3);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, r1_local, 0x0, cont);	//is non-shift?
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, r2_local);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, cont);
+
+
+			tcg_gen_subi_i32(tcg_ctx, r1_local, r1_local, 0x1);	//shift by one less
+			tcg_gen_sar_i32(tcg_ctx, r3_local, r2_local, r1_local);
+			tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r3_local, 0x1);	//LSB here is the last bit to be shifted
+			tcg_gen_sari_i32(tcg_ctx, r3_local, r3_local, 0x1);
+
+			gen_set_label(tcg_ctx, end);
+
+			gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r3_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r1_local);
+		}
+			break;
+
+		case OPC_RH850_SASF_cccc_reg2: {
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv operand_local = tcg_temp_local_new_i32(tcg_ctx);
+
+			int_cond = extract32(ctx->opcode,0,4);
+			TCGv condResult = condition_satisfied(tcg_ctx, int_cond);
+			cont = gen_new_label(tcg_ctx);
+
+			tcg_gen_shli_tl(tcg_ctx, r2_local, tcg_r2, 0x1);
+
+			tcg_gen_movi_i32(tcg_ctx, operand_local, 0x00000000);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, condResult, 0x1, cont);
+            tcg_gen_movi_i32(tcg_ctx, operand_local, 0x00000001);
+
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_or_tl(tcg_ctx, r2_local, r2_local, operand_local);
+
+			gen_set_gpr(tcg_ctx, rs2, r2_local);
+
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, operand_local);
+            tcg_temp_free(tcg_ctx, condResult);
+		}
+			break;
+
+		case OPC_RH850_SETF_cccc_reg2:{
+
+			TCGv operand_local = tcg_temp_local_new_i32(tcg_ctx);
+			int_cond = extract32(ctx->opcode,0,4);
+			TCGv condResult = condition_satisfied(tcg_ctx, int_cond);
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, condResult, 0x1, cont);
+			tcg_gen_movi_i32(tcg_ctx, operand_local, 0x00000001);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_movi_i32(tcg_ctx, operand_local, 0x00000000);
+			
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, rs2, operand_local);
+
+			tcg_temp_free(tcg_ctx, condResult);
+			tcg_temp_free(tcg_ctx, operand_local);
+		}
+			break;
+
+		case OPC_RH850_SHL_reg1_reg2: {
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+
+			tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f); 	//get only lower 5 bits
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont);
+
+			tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1); 	// shifting for [r1]-1
+			tcg_gen_shl_tl(tcg_ctx, r2_local, r2_local, temp_local);
+
+			tcg_gen_shri_i32(tcg_ctx, cpu_CYF, r2_local, 0x1f);	// checking the last bit to shift
+			tcg_gen_shli_i32(tcg_ctx, r2_local, r2_local, 0x1);		// shifting for that remaining 1
+
+			gen_set_gpr(tcg_ctx, rs2, r2_local);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+
+			gen_set_label(tcg_ctx, end);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+            tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, temp_local);
+		}
+			break;
+
+		case OPC_RH850_SHL_imm5_reg2: {
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			tcg_gen_movi_tl(tcg_ctx, r1_local, int_imm);
+			tcg_gen_ext8u_tl(tcg_ctx, r1_local, r1_local);
+
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont);
+
+			tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1);
+			tcg_gen_shl_tl(tcg_ctx, r2_local, r2_local, temp_local);
+			tcg_gen_shri_i32(tcg_ctx, cpu_CYF, r2_local, 0x1f);
+			tcg_gen_shli_tl(tcg_ctx, r2_local, r2_local, 0x1);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, rs2, r2_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+			tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, temp_local);
+		}
+			break;
+
+		case OPC_RH850_SHL_reg1_reg2_reg3: {
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, r3_local,int_rs3);
+
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont); 	// when reg1 = 0, do not shift
+
+			tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1);
+			tcg_gen_shl_tl(tcg_ctx, r3_local, r2_local, temp_local);
+
+			tcg_gen_shri_i32(tcg_ctx, cpu_CYF, r3_local, 0x1f);
+			tcg_gen_shli_tl(tcg_ctx, r3_local, r3_local, 0x1);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, r2_local);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r3_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+            tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, temp_local);
+		}
+			break;
+
+		case OPC_RH850_SHR_reg1_reg2: {
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f); //
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont); //checking for non-shift
+
+			tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1); 	// shifting for [r1]-1
+			tcg_gen_shr_tl(tcg_ctx, r2_local, r2_local, temp_local);
+
+
+			tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r2_local, 0x1);	// checking the last bit to shift (LSB)
+			tcg_gen_shri_i32(tcg_ctx, r2_local, r2_local, 0x1);
+
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, rs2, r2_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+			tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, temp_local);
+		}
+			break;
+
+		case OPC_RH850_SHR_imm5_reg2: {
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			tcg_gen_movi_tl(tcg_ctx, r1_local, int_imm);
+			tcg_gen_ext8u_tl(tcg_ctx, r1_local, r1_local);
+
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont); //checking for non-shift
+
+			tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1); 	// shifting for [r1]-1
+			tcg_gen_shr_tl(tcg_ctx, r2_local, r2_local, temp_local);
+
+			tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r2_local, 0x1);	// checking the last bit to shift (LSB)
+			tcg_gen_shri_i32(tcg_ctx, r2_local, r2_local, 0x1);
+
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, rs2, r2_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r2_local, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+			tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, temp_local);
+		}
+			break;
+
+		case OPC_RH850_SHR_reg1_reg2_reg3: {
+
+			TCGv r1_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new_i32(tcg_ctx);
+			TCGv temp_local = tcg_temp_local_new_i32(tcg_ctx);
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_andi_i32(tcg_ctx, r1_local, r1_local, 0x1f);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, r3_local, int_rs3);
+
+
+
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, r1_local, 0x0, cont); //checking for non-shift
+
+			tcg_gen_subi_i32(tcg_ctx, temp_local, r1_local, 0x1); 	// shifting for [r1]-1
+			tcg_gen_shr_tl(tcg_ctx, r3_local, r2_local, temp_local);
+
+			tcg_gen_andi_i32(tcg_ctx, cpu_CYF, r3_local, 0x1);	// checking the last bit to shift (LSB)
+			tcg_gen_shri_i32(tcg_ctx, r3_local, r3_local, 0x1);
+
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, cont);
+			tcg_gen_movi_i32(tcg_ctx, cpu_CYF, 0x0);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, r2_local);
+
+			gen_set_label(tcg_ctx, end);
+			gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r3_local, 0x0);
+			tcg_gen_shri_i32(tcg_ctx, cpu_SF, r3_local, 0x1f);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+
+            tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, temp_local);
+		}
+			break;
+
+		case OPC_RH850_SXB_reg1:
+			tcg_gen_andi_tl(tcg_ctx, tcg_r1, tcg_r1,0xFF);
+			tcg_gen_ext8s_tl(tcg_ctx, tcg_r1, tcg_r1);
+			gen_set_gpr(tcg_ctx, rs1, tcg_r1);
+			break;
+
+		case OPC_RH850_SXH_reg1:
+			tcg_gen_andi_tl(tcg_ctx, tcg_r1, tcg_r1,0xFFFF);
+			tcg_gen_ext16s_tl(tcg_ctx, tcg_r1, tcg_r1);
+			gen_set_gpr(tcg_ctx, rs1, tcg_r1);
+			break;
+
+		case OPC_RH850_ZXH_reg1:
+			tcg_gen_andi_tl(tcg_ctx, tcg_r1, tcg_r1,0xFFFF);
+			tcg_gen_ext16u_tl(tcg_ctx, tcg_r1, tcg_r1);
+			gen_set_gpr(tcg_ctx, rs1, tcg_r1);
+			break;
+
+		case OPC_RH850_ZXB_reg1:
+			tcg_gen_andi_tl(tcg_ctx, tcg_r1, tcg_r1,0xFF);
+			tcg_gen_ext8u_tl(tcg_ctx, tcg_r1, tcg_r1);
+			gen_set_gpr(tcg_ctx, rs1, tcg_r1);
+			break;
+	}
+
+	tcg_temp_free(tcg_ctx, tcg_r1);
+	tcg_temp_free(tcg_ctx, tcg_r2);
+	tcg_temp_free(tcg_ctx, tcg_r3);
+	tcg_temp_free(tcg_ctx, tcg_imm);
+	tcg_temp_free(tcg_ctx, tcg_temp);
+	tcg_temp_free(tcg_ctx, tcg_temp2);
+    tcg_temp_free(tcg_ctx, insert);
+}
+
+static void gen_bit_search(DisasContext *ctx, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGv tcg_r2 = tcg_temp_new(tcg_ctx);
+	TCGv tcg_r3 = tcg_temp_new(tcg_ctx);
+	int int_rs3;
+	int_rs3 = extract32(ctx->opcode, 27, 5);
+
+	gen_get_gpr(tcg_ctx, tcg_r2, rs2);
+	gen_get_gpr(tcg_ctx, tcg_r3, int_rs3);
+
+	TCGLabel *end;
+	TCGLabel *found;
+	TCGLabel *loop;
+
+	switch(operation){
+		case OPC_RH850_SCH0L_reg2_reg3: {
+
+			TCGv foundFlag = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv count = tcg_temp_local_new(tcg_ctx);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+			tcg_gen_movi_i32(tcg_ctx, count, 0x0);
+
+			end = gen_new_label(tcg_ctx);
+			found = gen_new_label(tcg_ctx);
+			loop = gen_new_label(tcg_ctx);
+
+			gen_set_label(tcg_ctx, loop);//---------------------------------------------------
+
+			tcg_gen_shl_i32(tcg_ctx, check, r2_local, count);
+			tcg_gen_ori_i32(tcg_ctx, check, check, 0x7fffffff);	// check MSB bit
+			tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, check, 0x7fffffff, found);
+
+			tcg_gen_addi_i32(tcg_ctx, count, count, 0x1);
+			tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_NE, count, 0x20, loop);//--------------------
+
+			tcg_gen_movi_i32(tcg_ctx, result, 0x0);
+			tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x0);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, found);
+			tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x1);
+			tcg_gen_addi_i32(tcg_ctx, result, count, 0x1);
+
+			gen_set_label(tcg_ctx, end);
+
+			gen_set_gpr(tcg_ctx, int_rs3, result);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, foundFlag, 0x1); //setting Z if not found
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SF, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_CYF, r2_local, 0xfffffffe); //setting CY if found at the end
+
+            tcg_temp_free(tcg_ctx, foundFlag);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+			tcg_temp_free(tcg_ctx, check);
+            tcg_temp_free(tcg_ctx, count);
+			tcg_temp_free(tcg_ctx, result);
+		}	break;
+
+		case OPC_RH850_SCH0R_reg2_reg3: {
+
+			TCGv foundFlag = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv count = tcg_temp_local_new(tcg_ctx);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+			tcg_gen_movi_i32(tcg_ctx, count, 0x0);
+
+			end = gen_new_label(tcg_ctx);
+			found = gen_new_label(tcg_ctx);
+			loop = gen_new_label(tcg_ctx);
+
+			gen_set_label(tcg_ctx, loop);//---------------------------------------------------
+
+			tcg_gen_shr_i32(tcg_ctx, check, r2_local, count);
+			tcg_gen_ori_i32(tcg_ctx, check, check, 0xfffffffe);	// check MSB bit
+			tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, check, 0xfffffffe, found);
+
+			tcg_gen_addi_i32(tcg_ctx, count, count, 0x1);
+			tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_NE, count, 0x20, loop);//--------------------
+
+			tcg_gen_movi_i32(tcg_ctx, result, 0x0);
+			tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x0);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, found);
+			tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x1);
+			tcg_gen_addi_i32(tcg_ctx, result, count, 0x1);
+
+			gen_set_label(tcg_ctx, end);
+
+			gen_set_gpr(tcg_ctx, int_rs3, result);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, foundFlag, 0x1); //setting Z if not found
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SF, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_CYF, r2_local, 0x7fffffff);
+
+            tcg_temp_free(tcg_ctx, foundFlag);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, check);
+            tcg_temp_free(tcg_ctx, count);
+            tcg_temp_free(tcg_ctx, result);
+		}	break;
+
+		case OPC_RH850_SCH1L_reg2_reg3: {
+
+			TCGv foundFlag = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv count = tcg_temp_local_new(tcg_ctx);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+			tcg_gen_movi_i32(tcg_ctx, count, 0x0);
+
+			end = gen_new_label(tcg_ctx);
+			found = gen_new_label(tcg_ctx);
+			loop = gen_new_label(tcg_ctx);
+
+			gen_set_label(tcg_ctx, loop);//---------------------------------------------------
+
+			tcg_gen_shl_i32(tcg_ctx, check, r2_local, count);
+			tcg_gen_andi_i32(tcg_ctx, check, check, 0x80000000);	// check MSB bit
+			tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, check, 0x80000000, found);
+
+			tcg_gen_addi_i32(tcg_ctx, count, count, 0x1);
+			tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_NE, count, 0x20, loop);//--------------------
+
+			tcg_gen_movi_i32(tcg_ctx, result, 0x0);
+			tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x0);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, found);
+			tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x1);
+			tcg_gen_addi_i32(tcg_ctx, result, count, 0x1);
+
+			gen_set_label(tcg_ctx, end);
+
+			gen_set_gpr(tcg_ctx, int_rs3, result);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, foundFlag, 0x1); //setting Z if not found
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SF, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_CYF, r2_local, 0x1);
+
+            tcg_temp_free(tcg_ctx, foundFlag);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, check);
+            tcg_temp_free(tcg_ctx, count);
+            tcg_temp_free(tcg_ctx, result);
+		}	break;
+
+		case OPC_RH850_SCH1R_reg2_reg3: {
+
+			TCGv foundFlag = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new(tcg_ctx);
+			TCGv result = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+			TCGv count = tcg_temp_local_new(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+			tcg_gen_movi_i32(tcg_ctx, count, 0x0);
+
+			end = gen_new_label(tcg_ctx);
+			found = gen_new_label(tcg_ctx);
+			loop = gen_new_label(tcg_ctx);
+
+			gen_set_label(tcg_ctx, loop);//---------------------------------------------------
+
+			tcg_gen_shr_i32(tcg_ctx, check, r2_local, count);
+			tcg_gen_andi_i32(tcg_ctx, check, check, 0x1);	// check MSB bit
+			tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, check, 0x1, found);
+
+			tcg_gen_addi_i32(tcg_ctx, count, count, 0x1);
+			tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_NE, count, 0x20, loop);//--------------------
+
+			tcg_gen_movi_i32(tcg_ctx, result, 0x0);
+			tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x0);
+			tcg_gen_br(tcg_ctx, end);
+
+			gen_set_label(tcg_ctx, found);
+			tcg_gen_movi_i32(tcg_ctx, foundFlag, 0x1);
+			tcg_gen_addi_i32(tcg_ctx, result, count, 0x1);
+
+			gen_set_label(tcg_ctx, end);
+
+			gen_set_gpr(tcg_ctx, int_rs3, result);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, foundFlag, 0x1); //setting Z if not found
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x0);
+			tcg_gen_movi_i32(tcg_ctx, cpu_SF, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_CYF, r2_local, 0x80000000);
+
+            tcg_temp_free(tcg_ctx, foundFlag);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, check);
+            tcg_temp_free(tcg_ctx, count);
+            tcg_temp_free(tcg_ctx, result);
+		}	break;
+	}
+
+	tcg_temp_free(tcg_ctx, tcg_r2);
+    tcg_temp_free(tcg_ctx, tcg_r3);
+}
+
+static void gen_divide(DisasContext *ctx, int rs1, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGv tcg_r1 = tcg_temp_new(tcg_ctx);
+	TCGv tcg_r2 = tcg_temp_new(tcg_ctx);
+
+	gen_get_gpr(tcg_ctx, tcg_r1, rs1);
+	gen_get_gpr(tcg_ctx, tcg_r2, rs2);
+
+	int int_rs3;
+
+	TCGv tcg_r3 = tcg_temp_new(tcg_ctx);
+
+	switch(operation){
+
+		case OPC_RH850_DIV_reg1_reg2_reg3:{
+
+			TCGLabel *cont;
+			TCGLabel *end;
+			TCGLabel *fin;
+
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3, int_rs3);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+			TCGv overflowed = tcg_temp_local_new(tcg_ctx);
+			TCGv overflowed2 = tcg_temp_local_new(tcg_ctx);
+
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+			fin = gen_new_label(tcg_ctx);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, r1_local, 0x0);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, cont); 		//if r1=0 jump to end
+
+			tcg_gen_movi_i32(tcg_ctx, r2_local, 0x80000000);
+			tcg_gen_br(tcg_ctx, fin);
+
+			gen_set_label(tcg_ctx, cont);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed, r2_local, 0x80000000);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed2, r1_local, 0xffffffff);
+			tcg_gen_and_i32(tcg_ctx, overflowed, overflowed, overflowed2);		//if both
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, overflowed, 0x1);	//are 1
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, end);
+			tcg_gen_movi_i32(tcg_ctx, r2_local, 0x80000000);						//DO THIS
+			tcg_gen_movi_i32(tcg_ctx, r3_local, 0x0000);
+			gen_set_gpr(tcg_ctx, rs2, r2_local);			//write zeros if undefined
+			gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0);
+			tcg_gen_br(tcg_ctx, fin);
+
+			gen_set_label(tcg_ctx, end);
+
+			tcg_gen_rem_i32(tcg_ctx, r3_local, r2_local, r1_local);
+			tcg_gen_div_i32(tcg_ctx, r2_local, r2_local, r1_local);
+
+			if(rs2==int_rs3){
+				gen_set_gpr(tcg_ctx, rs2, r3_local);
+			} else {
+				gen_set_gpr(tcg_ctx, rs2, r2_local);
+				gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+			}
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+
+			gen_set_label(tcg_ctx, fin);
+
+            tcg_temp_free(tcg_ctx, overflowed);
+            tcg_temp_free(tcg_ctx, overflowed2);
+            tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+		}	break;
+
+		case OPC_RH850_DIVH_reg1_reg2:{
+
+			TCGLabel *cont;
+			TCGLabel *end;
+			TCGLabel *fin;
+
+			tcg_gen_andi_i32(tcg_ctx, tcg_r1, tcg_r1, 0x0000FFFF);
+			tcg_gen_ext16s_i32(tcg_ctx, tcg_r1, tcg_r1);
+
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv overflowed = tcg_temp_local_new(tcg_ctx);
+			TCGv overflowed2 = tcg_temp_local_new(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+			fin = gen_new_label(tcg_ctx);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, r1_local, 0x0);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, cont); 		//if r1=0 jump to cont
+			tcg_gen_br(tcg_ctx, fin);
+
+			gen_set_label(tcg_ctx, cont);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed, r2_local, 0x80000000);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed2, r1_local, 0xffffffff);
+			tcg_gen_and_i32(tcg_ctx, overflowed, overflowed, overflowed2);		//if both
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, overflowed, 0x1);	//are 1
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, end);
+			tcg_gen_movi_i32(tcg_ctx, r2_local, 0x80000000);						//DO THIS
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x1);
+			gen_set_gpr(tcg_ctx, rs2, r2_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+			tcg_gen_br(tcg_ctx, fin);
+
+			gen_set_label(tcg_ctx, end);
+
+			tcg_gen_div_i32(tcg_ctx, r2_local, r2_local, r1_local);
+			gen_set_gpr(tcg_ctx, rs2, r2_local);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+
+			gen_set_label(tcg_ctx, fin);
+
+            tcg_temp_free(tcg_ctx, overflowed);
+            tcg_temp_free(tcg_ctx, overflowed2);
+            tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+		}	break;
+
+		case OPC_RH850_DIVH_reg1_reg2_reg3: {
+			// 0x80000000/0xffffffff=0x80000000; cpu_OVF=1, cpu_Z=1?
+			// reg2/0x0000=undefined; cpu_OVF=1
+			// if reg2==reg3; reg2=remainder
+
+			TCGLabel *cont;
+			TCGLabel *end;
+			TCGLabel *fin;
+
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new(tcg_ctx);
+
+			tcg_gen_andi_i32(tcg_ctx, tcg_r1, tcg_r1, 0x0000FFFF);
+			tcg_gen_ext16s_i32(tcg_ctx, tcg_r1, tcg_r1);
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3, int_rs3);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+			TCGv overflowed = tcg_temp_local_new(tcg_ctx);
+			TCGv overflowed2 = tcg_temp_local_new(tcg_ctx);
+
+			cont = gen_new_label(tcg_ctx);
+			end = gen_new_label(tcg_ctx);
+			fin = gen_new_label(tcg_ctx);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, r1_local, 0x0);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, cont);
+			tcg_gen_br(tcg_ctx, fin);
+
+			gen_set_label(tcg_ctx, cont);	/////
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed, r2_local, 0x80000000);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, overflowed2, r1_local, 0xffffffff);
+			tcg_gen_and_i32(tcg_ctx, overflowed, overflowed, overflowed2);	// if result is 1, cpu_OVF = 1
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, overflowed, 0x1);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, end);
+			tcg_gen_movi_i32(tcg_ctx, r2_local, 0x80000000);
+			tcg_gen_movi_i32(tcg_ctx, r3_local, 0x0000);
+			tcg_gen_movi_i32(tcg_ctx, cpu_OVF, 0x1);
+			gen_set_gpr(tcg_ctx, rs2, r2_local);
+			gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+			tcg_gen_br(tcg_ctx, fin);
+
+			gen_set_label(tcg_ctx, end);		/////
+
+			tcg_gen_rem_i32(tcg_ctx, r3_local, r2_local, r1_local);
+			tcg_gen_div_i32(tcg_ctx, r2_local, r2_local, r1_local);
+
+			if(rs2==int_rs3){
+				gen_set_gpr(tcg_ctx, rs2, r3_local);
+			} else {
+				gen_set_gpr(tcg_ctx, rs2, r2_local);
+				gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+			}
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+
+			gen_set_label(tcg_ctx, fin);		/////
+
+            tcg_temp_free(tcg_ctx, overflowed);
+            tcg_temp_free(tcg_ctx, overflowed2);
+            tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+		}	break;
+
+		case OPC_RH850_DIVHU_reg1_reg2_reg3:{
+
+			TCGLabel *cont;
+			TCGLabel *fin;
+
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new(tcg_ctx);
+
+			tcg_gen_andi_i32(tcg_ctx, tcg_r1, tcg_r1, 0x0000FFFF);
+			tcg_gen_ext16u_i32(tcg_ctx, tcg_r1, tcg_r1);
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3, int_rs3);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+
+			cont = gen_new_label(tcg_ctx);
+			fin = gen_new_label(tcg_ctx);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, r1_local, 0x0);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, cont);
+			tcg_gen_br(tcg_ctx, fin);
+
+			gen_set_label(tcg_ctx, cont);	/////
+			tcg_gen_remu_i32(tcg_ctx, r3_local, r2_local, r1_local);
+			tcg_gen_divu_i32(tcg_ctx, r2_local, r2_local, r1_local);
+
+			if(rs2==int_rs3){
+				gen_set_gpr(tcg_ctx, rs2, r3_local);
+			} else {
+				gen_set_gpr(tcg_ctx, rs2, r2_local);
+				gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+			}
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_LT, cpu_SF, r2_local, 0x0);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+
+			gen_set_label(tcg_ctx, fin);		/////
+
+            tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+		}
+			break;
+
+		case OPC_RH850_DIVU_reg1_reg2_reg3:{
+
+			// reg2/0x0000=undefined; cpu_OVF=1
+			// if reg2==reg3; reg2=remainder
+
+			TCGLabel *cont;
+			TCGLabel *fin;
+
+			TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r2_local = tcg_temp_local_new(tcg_ctx);
+			TCGv r3_local = tcg_temp_local_new(tcg_ctx);
+			TCGv check = tcg_temp_local_new(tcg_ctx);
+
+			tcg_gen_mov_i32(tcg_ctx, r1_local, tcg_r1);
+			tcg_gen_mov_i32(tcg_ctx, r2_local, tcg_r2);
+
+			int_rs3 = extract32(ctx->opcode, 27, 5);
+			gen_get_gpr(tcg_ctx, tcg_r3, int_rs3);
+			tcg_gen_mov_i32(tcg_ctx, r3_local, tcg_r3);
+
+			cont = gen_new_label(tcg_ctx);
+			fin = gen_new_label(tcg_ctx);
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_OVF, r1_local, 0x0);
+			tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_OVF, 0x1, cont);
+			tcg_gen_br(tcg_ctx, fin);
+
+			gen_set_label(tcg_ctx, cont);	/////
+
+			tcg_gen_remu_i32(tcg_ctx, r3_local, r2_local, r1_local);
+			tcg_gen_divu_i32(tcg_ctx, r2_local, r2_local, r1_local);
+
+			if(rs2==int_rs3){
+				gen_set_gpr(tcg_ctx, rs2, r3_local);
+			} else {
+				gen_set_gpr(tcg_ctx, rs2, r2_local);
+				gen_set_gpr(tcg_ctx, int_rs3, r3_local);
+			}
+
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, r2_local, 0x0);
+			tcg_gen_andi_i32(tcg_ctx, check, r2_local, 0x80000000);
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_SF, check, 0x80000000);
+
+			gen_set_label(tcg_ctx, fin);		/////
+
+            tcg_temp_free(tcg_ctx, r1_local);
+            tcg_temp_free(tcg_ctx, r2_local);
+            tcg_temp_free(tcg_ctx, r3_local);
+            tcg_temp_free(tcg_ctx, check);
+		}
+			break;
+	}
+
+	tcg_temp_free_i32(tcg_ctx, tcg_r1);
+	tcg_temp_free_i32(tcg_ctx, tcg_r2);
+	tcg_temp_free_i32(tcg_ctx, tcg_r3);
+}
+
+static void gen_branch(CPURH850State *env, DisasContext *ctx, uint32_t cond,
+                       int rs1, int rs2, target_long bimm)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+    TCGLabel *l = gen_new_label(tcg_ctx);
+    TCGv condOK = tcg_temp_new(tcg_ctx);
+    TCGv condResult = condition_satisfied(tcg_ctx, cond);
+    tcg_gen_movi_i32(tcg_ctx, condOK, 0x1);
+
+    tcg_gen_brcond_tl(tcg_ctx, TCG_COND_EQ, condResult, condOK, l);
+
+    tcg_temp_free(tcg_ctx, condResult);
+    tcg_temp_free(tcg_ctx, condOK);
+
+    gen_goto_tb_imm(ctx, 1, ctx->base.pc_next); // no jump, continue with next instr.
+    gen_set_label(tcg_ctx, l); /* branch taken */
+   	gen_goto_tb_imm(ctx, 0, ctx->pc + bimm);  // jump
+   	ctx->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED;
+}
+
+static void gen_jmp(DisasContext *ctx, int rs1, uint32_t disp32, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+    // disp32 is already generated when entering this function
+    int rs2, rs3;
+    TCGv link_addr = tcg_temp_new(tcg_ctx);
+    TCGv dest_addr = tcg_temp_new(tcg_ctx);
+
+    switch (operation)
+    {
+    /**
+     * Jump with immediate displacement.
+     * PC and disp32 are fixed and won't change at
+     * execution time, we can call gen_goto_tb_imm() with
+     * the computed destination address.
+     */
+    case OPC_RH850_JR_imm22:
+    case OPC_RH850_JR_imm32:
+    {
+        gen_goto_tb_imm(ctx, 0, ctx->pc + disp32);
+        ctx->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED;
+    }
+    break;
+
+    /**
+     * Jump with immediate displacement but store
+     * PC+4 first in reg2. We first call tcg_goto_tb(),
+     * update PC and reg2 and then issue an exit TB.
+     **/
+    case OPC_RH850_JARL_disp22_reg2:
+    {
+        rs2 = extract32(ctx->opcode, 11, 5);
+        tcg_gen_movi_i32(tcg_ctx, link_addr, ctx->pc);
+        tcg_gen_addi_i32(tcg_ctx, link_addr, link_addr, 0x4);
+        gen_set_gpr(tcg_ctx, rs2, link_addr);
+
+        /* Update pc */
+        tcg_gen_movi_i32(tcg_ctx, cpu_pc, ctx->pc + disp32);
+
+        /* Goto corresponding TB (indirect jump). */
+        ctx->base.is_jmp = DISAS_INDIRECT_JUMP;
+    }
+    break;
+
+        /**
+         * Jump with immediate displacement but store PC+6 first in reg1.
+         * We first call tcg_gen_goto_tb(), update PC and reg1 and then
+         * issue an exit TB.
+         **/
+
+    case OPC_RH850_JARL_disp32_reg1:
+    {
+        gen_goto_tb_rl(ctx, 0, rs1, 6, ctx->pc + disp32);
+        ctx->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED;
+    }
+    break;
+
+        /**
+         * This is a pure indirect call that will move GR[reg1] into PC,
+         * so we need to process in a different way.
+         **/
+
+    case OPC_RH850_JARL_reg1_reg3:
+    {
+        /* Get reg1 content into dest_addr. */
+        gen_get_gpr(tcg_ctx, dest_addr, rs1);
+
+        /* Get reg3 index, and store PC+4 in it. */
+        rs3 = extract32(ctx->opcode, 27, 5);
+        tcg_gen_movi_i32(tcg_ctx, link_addr, ctx->pc);
+        tcg_gen_addi_i32(tcg_ctx, link_addr, link_addr, 0x4);
+        gen_set_gpr(tcg_ctx, rs3, link_addr);
+
+        /* Update pc */
+        tcg_gen_andi_i32(tcg_ctx, dest_addr, dest_addr, 0xfffffffe);
+        tcg_gen_mov_i32(tcg_ctx, cpu_pc, dest_addr);
+
+        /* Goto corresponding TB (indirect jump). */
+        ctx->base.is_jmp = DISAS_INDIRECT_JUMP;
+    }
+    break;
+
+    default: // JMP instruction
+    {
+        /* Get reg1 into dest_addr. */
+        gen_get_gpr(tcg_ctx, dest_addr, rs1);
+
+        /* Apply displacement if provided. */
+        if (disp32 != 0)
+        {
+            tcg_gen_addi_i32(tcg_ctx, dest_addr, dest_addr, disp32);
+        }
+
+        /* Align and update PC. */
+        tcg_gen_andi_i32(tcg_ctx, dest_addr, dest_addr, 0xfffffffe);
+        tcg_gen_mov_i32(tcg_ctx, cpu_pc, dest_addr);
+
+        /* Indirect jump. */
+        ctx->base.is_jmp = DISAS_INDIRECT_JUMP;
+    }
+    break;
+    }
+
+    /* Free temporary values. */
+    tcg_temp_free_i32(tcg_ctx, link_addr);
+    tcg_temp_free_i32(tcg_ctx, dest_addr);
+}
+
+static void gen_loop(DisasContext *ctx, int rs1, int32_t disp16)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+    TCGLabel *l = gen_new_label(tcg_ctx);
+    TCGv zero_local = tcg_temp_local_new(tcg_ctx);
+    TCGv r1_local = tcg_temp_local_new(tcg_ctx);
+    TCGv minusone_local = tcg_temp_local_new(tcg_ctx);
+
+    tcg_gen_movi_i32(tcg_ctx, zero_local, 0);
+    tcg_gen_movi_i32(tcg_ctx, minusone_local, 0xffffffff);
+    gen_get_gpr(tcg_ctx, r1_local, rs1);
+	gen_flags_on_add(tcg_ctx, r1_local, minusone_local);    //set flags
+	tcg_gen_add_i32(tcg_ctx, r1_local, r1_local, minusone_local);
+	gen_set_gpr(tcg_ctx, rs1, r1_local);
+
+	tcg_gen_brcond_tl(tcg_ctx, TCG_COND_NE, r1_local, zero_local, l);
+
+    tcg_temp_free(tcg_ctx, r1_local);
+    tcg_temp_free(tcg_ctx, zero_local);
+    tcg_temp_free(tcg_ctx, minusone_local);
+
+    gen_goto_tb_imm(ctx, 0, ctx->base.pc_next); 	// no jump, continue with next instr.
+    gen_set_label(tcg_ctx, l); 					// branch taken
+    gen_goto_tb_imm(ctx, 1, ctx->pc - disp16);
+
+    ctx->base.is_jmp = DISAS_TB_EXIT_ALREADY_GENERATED;
+}
+
+static void gen_bit_manipulation(DisasContext *ctx, int rs1, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGv r1 = tcg_temp_new_i32(tcg_ctx);
+	TCGv r2 = tcg_temp_new_i32(tcg_ctx);
+	TCGv tcg_disp = tcg_temp_new_i32(tcg_ctx);
+	TCGv one = tcg_temp_new_i32(tcg_ctx);
+
+	TCGv temp = tcg_temp_new_i32(tcg_ctx);
+	TCGv test = tcg_temp_new_i32(tcg_ctx);
+	TCGv adr = tcg_temp_new_i32(tcg_ctx);
+	uint32_t disp16 = extract32(ctx->opcode, 16, 16);
+
+	int bit;
+
+	switch(operation){
+		case OPC_RH850_SET1_reg2_reg1:
+
+			gen_get_gpr(tcg_ctx, adr, rs1);
+			gen_get_gpr(tcg_ctx, r2, rs2);
+			tcg_gen_movi_i32(tcg_ctx, one, 0x1);
+
+			tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+
+			tcg_gen_shl_i32(tcg_ctx, r2, one, r2);
+
+			tcg_gen_and_i32(tcg_ctx, test, temp, r2);
+			tcg_gen_setcond_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, r2);
+
+			tcg_gen_or_i32(tcg_ctx, temp, temp, r2);
+
+			tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+
+			break;
+		case OPC_RH850_SET1_bit3_disp16_reg1:
+
+			gen_get_gpr(tcg_ctx, r1, rs1);
+			tcg_gen_movi_i32(tcg_ctx, tcg_disp, disp16);
+			tcg_gen_ext16s_i32(tcg_ctx, tcg_disp, tcg_disp);
+			tcg_gen_add_i32(tcg_ctx, adr, r1, tcg_disp);
+
+			bit = extract32(ctx->opcode, 11, 3);
+
+			tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+
+			tcg_gen_andi_i32(tcg_ctx, test, temp, (0x1 << bit));
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, (0x1 << bit));
+
+			tcg_gen_ori_i32(tcg_ctx, temp, temp, (0x1 << bit));
+
+			tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+			break;
+
+		case OPC_RH850_NOT1_reg2_reg1:
+
+			gen_get_gpr(tcg_ctx, adr, rs1);
+			gen_get_gpr(tcg_ctx, r2, rs2);
+			tcg_gen_movi_i32(tcg_ctx, one, 0x1);
+
+			tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+
+			tcg_gen_shl_i32(tcg_ctx, r2, one, r2); // r2 = mask
+
+			tcg_gen_and_i32(tcg_ctx, test, temp, r2);
+			tcg_gen_setcond_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, r2);
+
+			//test = temp & mask
+			tcg_gen_and_i32(tcg_ctx, test, temp, r2);
+			//test = not (test) & mask
+			tcg_gen_not_i32(tcg_ctx, test, test);
+			tcg_gen_and_i32(tcg_ctx, test, test, r2);
+			//temp = temp & not(mask)
+			tcg_gen_not_i32(tcg_ctx, r2, r2);
+			tcg_gen_and_i32(tcg_ctx, temp, temp, r2);
+			//temp = temp or test
+			tcg_gen_or_i32(tcg_ctx, temp, temp, test);
+
+			tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+			break;
+
+		case OPC_RH850_NOT1_bit3_disp16_reg1:
+
+			gen_get_gpr(tcg_ctx, r1, rs1);
+			tcg_gen_movi_i32(tcg_ctx, tcg_disp, disp16);
+			tcg_gen_ext16s_i32(tcg_ctx, tcg_disp, tcg_disp);
+			tcg_gen_add_i32(tcg_ctx, adr, r1, tcg_disp);
+
+			bit = extract32(ctx->opcode, 11, 3);
+
+			tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+
+			tcg_gen_andi_i32(tcg_ctx, test, temp, (0x1 << bit));
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, (0x1 << bit));
+
+			tcg_gen_movi_i32(tcg_ctx, r2, (0x1 << bit)); // r2 = mask
+
+			//test = temp & mask
+			tcg_gen_and_i32(tcg_ctx, test, temp, r2);
+			//test = not (test) & mask
+			tcg_gen_not_i32(tcg_ctx, test, test);
+			tcg_gen_and_i32(tcg_ctx, test, test, r2);
+			//temp = temp & not(mask)
+			tcg_gen_not_i32(tcg_ctx, r2, r2);
+			tcg_gen_and_i32(tcg_ctx, temp, temp, r2);
+			//temp = temp or test
+			tcg_gen_or_i32(tcg_ctx, temp, temp, test);
+
+			tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+			break;
+
+		case OPC_RH850_CLR1_reg2_reg1:
+
+			gen_get_gpr(tcg_ctx, adr, rs1);
+			gen_get_gpr(tcg_ctx, r2, rs2);
+			tcg_gen_movi_i32(tcg_ctx, one, 0x1);
+
+			tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+			tcg_gen_andi_i32(tcg_ctx, r2, r2, 0x7);
+			tcg_gen_shl_i32(tcg_ctx, r2, one, r2);
+
+			tcg_gen_and_i32(tcg_ctx, test, temp, r2);
+			tcg_gen_setcond_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, r2);
+
+			tcg_gen_not_i32(tcg_ctx, r2, r2);
+			tcg_gen_and_i32(tcg_ctx, temp, temp, r2);
+
+			tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+			break;
+
+		case OPC_RH850_CLR1_bit3_disp16_reg1:
+
+			gen_get_gpr(tcg_ctx, r1, rs1);
+			tcg_gen_movi_i32(tcg_ctx, tcg_disp, disp16);
+			tcg_gen_ext16s_i32(tcg_ctx, tcg_disp, tcg_disp);
+			tcg_gen_add_i32(tcg_ctx, adr, r1, tcg_disp);
+
+			bit = extract32(ctx->opcode, 11, 3);
+
+			tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+
+			tcg_gen_movi_i32(tcg_ctx, test, (0x1 << bit));
+			tcg_gen_andi_i32(tcg_ctx, test, temp, (0x1 << bit));
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, (0x1 << bit));
+
+			tcg_gen_movi_i32(tcg_ctx, test, (0x1 << bit));
+			tcg_gen_not_i32(tcg_ctx, test, test);
+			tcg_gen_and_i32(tcg_ctx, temp, temp, test);
+
+			tcg_gen_qemu_st_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+			break;
+
+		case OPC_RH850_TST1_reg2_reg1:
+
+			gen_get_gpr(tcg_ctx, adr, rs1);
+			gen_get_gpr(tcg_ctx, r2, rs2);
+			tcg_gen_movi_i32(tcg_ctx, one, 0x1);
+
+			tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+
+			tcg_gen_shl_i32(tcg_ctx, r2, one, r2);
+
+			tcg_gen_and_i32(tcg_ctx, test, temp, r2);
+			tcg_gen_setcond_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, r2);
+			break;
+
+		case OPC_RH850_TST1_bit3_disp16_reg1:
+
+			gen_get_gpr(tcg_ctx, r1, rs1);
+			tcg_gen_movi_i32(tcg_ctx, tcg_disp, disp16);
+			tcg_gen_ext16s_i32(tcg_ctx, tcg_disp, tcg_disp);
+			tcg_gen_add_i32(tcg_ctx, adr, r1, tcg_disp);
+
+			bit = extract32(ctx->opcode, 11, 3);
+
+			tcg_gen_qemu_ld_i32(tcg_ctx, temp, adr, MEM_IDX, MO_UB);
+
+			tcg_gen_movi_i32(tcg_ctx, test, (0x1 << bit));
+			tcg_gen_andi_i32(tcg_ctx, test, temp, (0x1 << bit));
+			tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_NE, cpu_ZF, test, (0x1 << bit));
+			break;
+	}
+
+	tcg_temp_free_i32(tcg_ctx, r1);
+	tcg_temp_free_i32(tcg_ctx, r2);
+	tcg_temp_free_i32(tcg_ctx, tcg_disp);
+	tcg_temp_free_i32(tcg_ctx, one);
+	tcg_temp_free_i32(tcg_ctx, temp);
+	tcg_temp_free_i32(tcg_ctx, test);
+	tcg_temp_free_i32(tcg_ctx, adr);
+
+}
+
+static void gen_update_ispr(DisasContext *ctx, CPURH850State *env)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+    TCGLabel *do_not_update = gen_new_label(tcg_ctx);
+    TCGLabel *clear_bit = gen_new_label(tcg_ctx);
+    TCGLabel *loop = gen_new_label(tcg_ctx);
+    TCGv temp = tcg_temp_local_new_i32(tcg_ctx);
+    TCGv idx = tcg_temp_local_new_i32(tcg_ctx);
+    TCGv ispr = tcg_temp_local_new_i32(tcg_ctx);
+
+    /* Move ISPR value into intcfg. */
+    tcg_gen_mov_i32(tcg_ctx, temp, cpu_sysRegs[BANK_ID_BASIC_2][INTCFG_IDX2]);
+
+    /* And intcfg with 1. */
+    tcg_gen_andi_i32(tcg_ctx, temp, temp, 1);
+
+    /* Compare intcfg, jump to do_not_process if 1. */
+    tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, temp, 0, do_not_update);
+
+    /* INTCFG.ICSP = 0, now check EP (EP == 1 -> do not update ISRP) */
+    tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, cpu_EP, 0, do_not_update);
+
+    /**
+     * Okay, now update ISPR (clear the highest priority bit).
+     * We need to loop from bit 0 to bit 15, clear bit and exit loop if bit is
+     * set.
+     **/
+
+    /* Set mask to 1. */
+    tcg_gen_movi_i32(tcg_ctx, temp, 1);
+    tcg_gen_movi_i32(tcg_ctx, idx, 0);
+
+    gen_set_label(tcg_ctx, loop);
+
+    /* Load ISPR. */
+    tcg_gen_mov_i32(tcg_ctx, ispr, cpu_sysRegs[BANK_ID_BASIC_2][ISPR_IDX2]);
+    tcg_gen_and_i32(tcg_ctx, ispr, ispr, temp);
+    tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, ispr, 1, clear_bit);
+
+    /* shift left our mask, exit if done. */
+    tcg_gen_shli_i32(tcg_ctx, temp, temp, 1);
+    tcg_gen_addi_i32(tcg_ctx, idx, idx, 1);
+
+    tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_LT, idx, 2, loop);
+
+    tcg_gen_br(tcg_ctx, do_not_update);
+
+    /* Clear bit. */
+    gen_set_label(tcg_ctx, clear_bit);
+    tcg_gen_xor_i32(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_2][ISPR_IDX2], cpu_sysRegs[BANK_ID_BASIC_2][ISPR_IDX2], temp);
+
+    /* Set label do_not_update here. */
+    gen_set_label(tcg_ctx, do_not_update);
+
+    /* Free resources. */
+    tcg_temp_free_i32(tcg_ctx, temp);
+    tcg_temp_free_i32(tcg_ctx, ispr);
+}
+
+static void gen_special(DisasContext *ctx, CPURH850State *env, int rs1, int rs2, int operation)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	TCGLabel *storeReg3;
+	TCGLabel *cont;
+	int regID;
+	int selID = 0;
+	int imm;
+    int vector;
+
+	switch(operation){
+	case OPC_RH850_CALLT_imm6: {
+        TCGv temp = tcg_temp_new_i32(tcg_ctx);
+        TCGv adr = tcg_temp_new_i32(tcg_ctx);
+
+		//setting CTPC to PC+2
+		tcg_gen_addi_i32(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_0][CTPC_IDX], cpu_pc, 0x2);
+		//setting CPTSW bits 0:4
+		flags_to_tcgv_z_cy_ov_s_sat(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_0][CTPSW_IDX]);
+
+		imm = extract32(ctx->opcode, 0, 6);
+		tcg_gen_movi_i32(tcg_ctx, adr, imm);
+		tcg_gen_shli_i32(tcg_ctx, adr, adr, 0x1);
+		tcg_gen_ext8s_i32(tcg_ctx, adr, adr);
+		tcg_gen_add_i32(tcg_ctx, adr, cpu_sysRegs[BANK_ID_BASIC_0][CTBP_IDX], adr);
+
+		tcg_gen_qemu_ld16u(tcg_ctx, temp, adr, 0);
+
+		tcg_gen_add_i32(tcg_ctx, cpu_pc, temp, cpu_sysRegs[BANK_ID_BASIC_0][CTBP_IDX]);
+	    ctx->base.is_jmp = DISAS_EXIT_TB;
+
+	    tcg_temp_free(tcg_ctx, temp);
+	    tcg_temp_free(tcg_ctx, adr);
+	} break;
+
+	case OPC_RH850_CAXI_reg1_reg2_reg3: {
+	    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+	    TCGv adr = tcg_temp_new_i32(tcg_ctx);
+	    TCGv r2 = tcg_temp_new(tcg_ctx);
+	    TCGv r3 = tcg_temp_new(tcg_ctx);
+
+		storeReg3 = gen_new_label(tcg_ctx);
+		gen_get_gpr(tcg_ctx, adr, rs1);
+		gen_get_gpr(tcg_ctx, r2, rs2);
+		int rs3 = extract32(ctx->opcode, 27, 5);
+		gen_get_gpr(tcg_ctx, r3, rs3);
+		tcg_gen_qemu_ld32u(tcg_ctx, temp, adr, 0);
+		storeReg3 = gen_new_label(tcg_ctx);
+		cont = gen_new_label(tcg_ctx);
+
+		TCGv local_adr = tcg_temp_local_new_i32(tcg_ctx);
+		TCGv local_r2 = tcg_temp_local_new_i32(tcg_ctx);
+		TCGv local_r3 = tcg_temp_local_new_i32(tcg_ctx);
+		TCGv local_temp = tcg_temp_local_new_i32(tcg_ctx);
+
+		tcg_gen_mov_i32(tcg_ctx, local_adr, adr);
+		tcg_gen_mov_i32(tcg_ctx, local_r2, r2);
+		tcg_gen_mov_i32(tcg_ctx, local_r3, r3);
+		tcg_gen_mov_i32(tcg_ctx, local_temp, temp);
+
+		gen_flags_on_sub(tcg_ctx, local_r2, local_temp);
+
+		tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, cpu_ZF, 0x1, storeReg3);
+		tcg_gen_qemu_st_tl(tcg_ctx, local_temp, local_adr, MEM_IDX, MO_TESL);
+		tcg_gen_br(tcg_ctx, cont);
+
+		gen_set_label(tcg_ctx, storeReg3);
+		tcg_gen_qemu_st_tl(tcg_ctx, local_r3, local_adr, MEM_IDX, MO_TESL);
+
+		gen_set_label(tcg_ctx, cont);
+		gen_set_gpr(tcg_ctx, rs3, local_temp);
+
+        tcg_temp_free(tcg_ctx, temp);
+        tcg_temp_free(tcg_ctx, adr);
+        tcg_temp_free(tcg_ctx, r2);
+        tcg_temp_free(tcg_ctx, r3);
+		break;
+	}
+
+	case OPC_RH850_CTRET: {
+	    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+
+		tcg_gen_mov_i32(tcg_ctx, cpu_pc, cpu_sysRegs[BANK_ID_BASIC_0][CTPC_IDX]);
+		tcgv_to_flags_z_cy_ov_s_sat(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_0][CTPSW_IDX]);
+
+	    ctx->base.is_jmp = DISAS_EXIT_TB;
+
+	    tcg_temp_free(tcg_ctx, temp);
+	} break;
+
+	case OPC_RH850_DI:
+		tcg_gen_movi_i32(tcg_ctx, cpu_ID, 0x1);
+		break;
+
+	case OPC_RH850_DISPOSE_imm5_list12: {
+	    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+	    TCGv adr = tcg_temp_new_i32(tcg_ctx);
+
+		int list [12] = {31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20};
+		int numOfListItems = sizeof(list) / sizeof(list[0]);
+		int list12 = extract32(ctx->opcode, 0, 1) | ( (extract32(ctx->opcode, 21, 11)) << 1);
+
+		// reorganising bits that indicate the registers to load
+		// doing this for easier looping in correct order
+		int dispList = 	((list12 & 0x80) << 4) |
+						((list12 & 0x40) << 4) |
+						((list12 & 0x20) << 4) |
+						((list12 & 0x10) << 4) |
+						((list12 & 0x800) >> 4) |
+						((list12 & 0x400) >> 4) |
+						((list12 & 0x200) >> 4) |
+						((list12 & 0x100) >> 4) |
+						((list12 & 0x8) << 0) |
+						((list12 & 0x4) << 0) |
+						((list12 & 0x2) >> 1) |
+						((list12 & 0x1) << 1) ;
+
+		int test = 0x1;
+		gen_get_gpr(tcg_ctx, temp, 3); // stack pointer (sp) register is cpu_gpr[3]
+		tcg_gen_addi_i32(tcg_ctx, temp, temp, (extract32(ctx->opcode, 1, 5) << 2));
+
+		TCGv regToLoad = tcg_temp_new_i32(tcg_ctx);
+
+		for(int i=0; i<numOfListItems; i++){
+			tcg_gen_andi_i32(tcg_ctx, adr, temp, 0xfffffffc); //masking the lower two bits
+
+			if( !((dispList & test)==0x0) ){
+				tcg_gen_qemu_ld_i32(tcg_ctx, regToLoad, adr, MEM_IDX, MO_TESL);
+
+				gen_set_gpr(tcg_ctx, list[i], regToLoad);
+				tcg_gen_addi_i32(tcg_ctx, temp, temp, 0x4);
+			}
+			test = test << 1;
+		}
+		gen_set_gpr(tcg_ctx, 3, temp);
+
+		tcg_temp_free(tcg_ctx, temp);
+        tcg_temp_free(tcg_ctx, adr);
+		}
+
+		break;
+
+	case OPC_RH850_DISPOSE_imm5_list12_reg1: {
+	    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+	    TCGv adr = tcg_temp_new_i32(tcg_ctx);
+
+		int list [12] = {31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20};
+		int numOfListItems = sizeof(list) / sizeof(list[0]);
+		int list12 = extract32(ctx->opcode, 0, 1) | ( (extract32(ctx->opcode, 21, 11)) << 1);
+		TCGv jmpAddr = tcg_temp_new_i32(tcg_ctx);
+
+		// reorganising bits that indicate the registers to load
+		// doing this for easier looping in correct order
+		int dispList = 	((list12 & 0x80) << 4) |
+						((list12 & 0x40) << 4) |
+						((list12 & 0x20) << 4) |
+						((list12 & 0x10) << 4) |
+						((list12 & 0x800) >> 4) |
+						((list12 & 0x400) >> 4) |
+						((list12 & 0x200) >> 4) |
+						((list12 & 0x100) >> 4) |
+						((list12 & 0x8) << 0) |
+						((list12 & 0x4) << 0) |
+						((list12 & 0x2) >> 1) |
+						((list12 & 0x1) << 1) ;
+
+		int test = 0x1;
+		gen_get_gpr(tcg_ctx, temp, 3); // stack pointer (sp) register is cpu_gpr[3]
+		tcg_gen_addi_i32(tcg_ctx, temp, temp, (extract32(ctx->opcode, 1, 5) << 2));
+
+		TCGv regToLoad = tcg_temp_new_i32(tcg_ctx);
+
+		for(int i=0; i<numOfListItems; i++){
+			tcg_gen_andi_i32(tcg_ctx, adr, temp, 0xfffffffc); //masking the lower two bits
+
+			if( !((dispList & test)==0x0) ){
+				tcg_gen_qemu_ld_i32(tcg_ctx, regToLoad, adr, MEM_IDX, MO_TESL);
+
+				gen_set_gpr(tcg_ctx, list[i], regToLoad);
+				tcg_gen_addi_i32(tcg_ctx, temp, temp, 0x4);
+			}
+			test = test << 1;
+		}
+
+		gen_get_gpr(tcg_ctx, jmpAddr, (extract32(ctx->opcode, 16, 5)));
+		tcg_gen_mov_i32(tcg_ctx, cpu_pc, jmpAddr);
+
+		gen_set_gpr(tcg_ctx, 3, temp);
+	    ctx->base.is_jmp = DISAS_EXIT_TB;
+
+	    tcg_temp_free(tcg_ctx, temp);
+        tcg_temp_free(tcg_ctx, adr);
+		}
+	    break;
+
+	case OPC_RH850_EI:
+		tcg_gen_movi_i32(tcg_ctx, cpu_ID, 0x0);
+		break;
+	case OPC_RH850_EIRET:
+        /* Move EIPC to PC and EIPSW to PSW. */
+        tcg_gen_mov_i32(tcg_ctx, cpu_pc, cpu_sysRegs[BANK_ID_BASIC_0][EIPC_IDX]);
+        tcgv_to_flags(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_0][EIPSW_IDX]);
+
+        /* Update ISPR. */
+        gen_update_ispr(ctx, env);
+        ctx->base.is_jmp = DISAS_EXIT_TB;
+		break;
+	case OPC_RH850_FERET:
+		tcg_gen_mov_i32(tcg_ctx, cpu_pc, cpu_sysRegs[BANK_ID_BASIC_0][FEPC_IDX]);
+        tcgv_to_flags(tcg_ctx, cpu_sysRegs[BANK_ID_BASIC_0][FEPSW_IDX]);
+	    ctx->base.is_jmp = DISAS_EXIT_TB;
+		break;
+
+	case OPC_RH850_FETRAP_vector4: {
+
+		vector = extract32(ctx->opcode, 11, 4);
+        TCGv_i32 excp = tcg_const_i32(tcg_ctx, RH850_EXCP_FETRAP);
+        TCGv_i32 cause = tcg_const_i32(tcg_ctx, vector + 0x30);
+        gen_helper_raise_exception_with_cause(tcg_ctx, tcg_ctx->cpu_env, excp, cause);
+        tcg_temp_free_i32(tcg_ctx, excp);
+        tcg_temp_free_i32(tcg_ctx, cause);
+        ctx->base.is_jmp = DISAS_NORETURN;
+	}	break;
+
+	case OPC_RH850_HALT:
+	    // nop, interupts are not implemented, so HALT would never continue
+	    // tcg_abort();
+		break;
+
+    case OPC_RH850_LDSR_reg2_regID_selID:
+        selID = extract32(ctx->opcode, 27, 5);
+        regID = rs2;
+
+        // Modify only sytem regs, which exist. Real device executes instruction, but
+        // value is not stored for system regs, which do not exist. No exception is
+        // thrown.
+        if(cpu_sysRegs[selID][regID] != NULL  ||  (selID == BANK_ID_BASIC_0  &&  regID == PSW_IDX)) {
+
+            TCGv tmp = tcg_temp_new(tcg_ctx);
+            gen_get_gpr(tcg_ctx, tmp, rs1);
+
+            if(selID == BANK_ID_BASIC_0  &&  regID == PSW_IDX){
+                tcgv_to_flags(tcg_ctx, tmp);
+            } else {
+                // clear read-only bits in value, all other bits in sys reg. This way
+                // read-only bits preserve their value given at reset
+                tcg_gen_andi_i32(tcg_ctx, tmp, tmp, rh850_sys_reg_read_only_masks[selID][regID]);
+                tcg_gen_andi_i32(tcg_ctx, cpu_sysRegs[selID][regID], cpu_sysRegs[selID][regID], ~rh850_sys_reg_read_only_masks[selID][regID]);
+                tcg_gen_or_i32(tcg_ctx, cpu_sysRegs[selID][regID], cpu_sysRegs[selID][regID], tmp);
+            }
+            tcg_temp_free(tcg_ctx, tmp);
+        }
+		break;
+
+	//case OPC_RH850_LDLW:
+		//break;
+
+	//case OPC_RH850_NOP:
+		//break;
+
+	case OPC_RH850_POPSP_rh_rt:  {
+	    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+	    TCGv adr = tcg_temp_new_i32(tcg_ctx);
+
+		uint32_t rs3 = extract32(ctx->opcode, 27, 5);
+
+		int numOfRegs = (rs3-rs1)+1;
+
+		gen_get_gpr(tcg_ctx, temp, 3); // stack pointer register is cpu_gpr[3]
+		TCGv regToLoad = tcg_temp_new_i32(tcg_ctx);
+
+		if(rs1<=rs3){
+
+			for(int i=0; i<numOfRegs; i++){
+
+				tcg_gen_andi_i32(tcg_ctx, adr, temp, 0xfffffffc); // masking the lower two bits
+
+				tcg_gen_qemu_ld_i32(tcg_ctx, regToLoad, adr, MEM_IDX, MO_TESL);
+
+				gen_set_gpr(tcg_ctx, rs3-i, regToLoad);
+				tcg_gen_addi_i32(tcg_ctx, temp, temp, 0x4);
+
+				}
+			gen_set_gpr(tcg_ctx, 3, temp);
+		}
+
+        tcg_temp_free(tcg_ctx, temp);
+        tcg_temp_free(tcg_ctx, adr);
+	}	break;
+
+	case OPC_RH850_PREPARE_list12_imm5:{
+	    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+	    TCGv adr = tcg_temp_new_i32(tcg_ctx);
+
+		// how to manually affect the ff field?
+
+		int list [12] = {20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+		int list12 = ( (extract32(ctx->opcode, 21, 11) << 1) | (extract32(ctx->opcode, 0, 1) ) ) ;
+		int numOfListItems = sizeof(list) / sizeof(list[0]);
+		int prepList = 	((list12 & 0x80) >> 7) |
+						((list12 & 0x40) >> 5) |
+						((list12 & 0x20) >> 3) |
+						((list12 & 0x10) >> 1) |
+						((list12 & 0x800) >> 7) |
+						((list12 & 0x400) >> 5) |
+						((list12 & 0x200) >> 3) |
+						((list12 & 0x100) >> 1) |
+						((list12 & 0x8) << 5) |
+						((list12 & 0x4) << 7) |
+						((list12 & 0x2) << 10) |
+						((list12 & 0x1) << 10) ;
+
+		int test = 0x1;
+		gen_get_gpr(tcg_ctx, temp, 3); // stack pointer register is cpu_gpr[3]
+		TCGv regToStore = tcg_temp_new_i32(tcg_ctx);
+
+		for(int i=0; i<numOfListItems; i++){
+
+			if( !((prepList & test)==0x0) ){
+				tcg_gen_subi_i32(tcg_ctx, temp, temp, 0x4);
+				tcg_gen_andi_i32(tcg_ctx, adr, temp, 0xfffffffc); //masking the lower two bits
+				gen_get_gpr(tcg_ctx, regToStore, list[i]);
+				tcg_gen_qemu_st_i32(tcg_ctx, regToStore, adr, MEM_IDX, MO_TESL);
+				gen_set_gpr(tcg_ctx, list[i], regToStore);
+			}
+			test = test << 1;
+		}
+		tcg_gen_subi_i32(tcg_ctx, temp, temp, (extract32(ctx->opcode, 1, 5) << 2));
+		gen_set_gpr(tcg_ctx, 3, temp);
+
+        tcg_temp_free(tcg_ctx, temp);
+        tcg_temp_free(tcg_ctx, adr);
+	}	break;
+
+	case OPC_RH850_PREPARE_list12_imm5_sp:{
+	    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+	    TCGv adr = tcg_temp_new_i32(tcg_ctx);
+
+		int list [12] = {20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+		uint32_t list12 = extract32(ctx->opcode, 0, 1) | ( (extract32(ctx->opcode, 21, 11)) << 1);
+		int numOfListItems = sizeof(list) / sizeof(list[0]);
+		int prepList = 	((list12 & 0x80) >> 7) |
+								((list12 & 0x40) >> 5) |
+								((list12 & 0x20) >> 3) |
+								((list12 & 0x10) >> 1) |
+								((list12 & 0x800) >> 7) |
+								((list12 & 0x400) >> 5) |
+								((list12 & 0x200) >> 3) |
+								((list12 & 0x100) >> 1) |
+								((list12 & 0x8) << 5) |
+								((list12 & 0x4) << 7) |
+								((list12 & 0x2) << 10) |
+								((list12 & 0x1) << 10) ;
+
+		uint32_t imm = 0x0;
+
+		int test = 0x1;
+		int ff = extract32(ctx->opcode, 19, 2);
+		gen_get_gpr(tcg_ctx, temp, 3); // stack pointer register is cpu_gpr[3]
+		TCGv regToStore = tcg_temp_new_i32(tcg_ctx);
+
+		for(int i=0; i<numOfListItems; i++){
+
+			if( !((prepList & test)==0x0) ){
+				tcg_gen_subi_i32(tcg_ctx, temp, temp, 0x4);
+				tcg_gen_andi_i32(tcg_ctx, adr, temp, 0xfffffffc); //masking the lower two bits
+				gen_get_gpr(tcg_ctx, regToStore, list[i]);
+				tcg_gen_qemu_st32(tcg_ctx, regToStore, adr, MEM_IDX);
+				gen_set_gpr(tcg_ctx, list[i], regToStore);
+			}
+			test = test << 1;
+		}
+
+		tcg_gen_subi_i32(tcg_ctx, temp, temp, (extract32(ctx->opcode, 1, 5) << 2));
+
+		gen_set_gpr(tcg_ctx, 3, temp);
+
+		switch(ff){
+
+			case 0x0:
+				gen_set_gpr(tcg_ctx, 30, temp); //moving sp to ep (element pointer is at cpu_gpr[30])
+				break;
+
+			case 0x1:
+				imm = cpu_lduw_code(env, ctx->base.pc_next); // fetching additional 16bits from memory
+				tcg_gen_movi_i32(tcg_ctx, temp, imm);
+				tcg_gen_ext16s_i32(tcg_ctx, temp, temp);
+				gen_set_gpr(tcg_ctx, 30, temp);
+				ctx->base.pc_next+=2;						// increasing PC due to additional fetch
+				break;
+
+			case 0x2:
+				imm = cpu_lduw_code(env, ctx->base.pc_next); // fetching additional 16bits from memory
+				tcg_gen_movi_i32(tcg_ctx, temp, imm);
+				tcg_gen_shli_i32(tcg_ctx, temp, temp, 0x10);
+				gen_set_gpr(tcg_ctx, 30, temp);
+				ctx->base.pc_next+=2;
+				break;
+
+			case 0x3:
+				imm = cpu_lduw_code(env, ctx->base.pc_next) |
+				(cpu_lduw_code(env, ctx->base.pc_next + 2) << 0x10);
+				// fetching additional 32bits from memory
+
+				tcg_gen_movi_i32(tcg_ctx, temp, imm);
+				gen_set_gpr(tcg_ctx, 30, temp);
+				ctx->base.pc_next = ctx->base.pc_next + 4;
+				break;
+		}
+
+        tcg_temp_free(tcg_ctx, temp);
+        tcg_temp_free(tcg_ctx, adr);
+		}	break;
+
+	case OPC_RH850_PUSHSP_rh_rt: {
+	    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+	    TCGv adr = tcg_temp_new_i32(tcg_ctx);
+
+		uint32_t rs3 = extract32(ctx->opcode, 27, 5);
+
+		int numOfRegs = (rs3-rs1)+1;
+
+		gen_get_gpr(tcg_ctx, temp, 3); // stack pointer register is cpu_gpr[3]
+		TCGv regToStore = tcg_temp_new_i32(tcg_ctx);
+		if(rs1<=rs3){
+
+			for(int i=0; i<numOfRegs; i++){
+				tcg_gen_subi_i32(tcg_ctx, temp, temp, 0x4);
+				tcg_gen_andi_i32(tcg_ctx, adr, temp, 0xfffffffc); // masking the lower two bits
+
+				gen_get_gpr(tcg_ctx, regToStore, rs1+i);
+
+				tcg_gen_qemu_st_i32(tcg_ctx, regToStore, adr, MEM_IDX, MO_TESL);
+				}
+			gen_set_gpr(tcg_ctx, 3, temp);
+		}
+
+        tcg_temp_free(tcg_ctx, temp);
+        tcg_temp_free(tcg_ctx, adr);
+	}	break;
+
+	case OPC_RH850_RIE: {
+
+		TCGv_i32 excp = tcg_const_i32(tcg_ctx, RH850_EXCP_RIE);
+        TCGv_i32 cause = tcg_const_i32(tcg_ctx, 0x60);
+        gen_helper_raise_exception_with_cause(tcg_ctx, tcg_ctx->cpu_env, excp, cause);
+        tcg_temp_free_i32(tcg_ctx, excp);
+        tcg_temp_free_i32(tcg_ctx, cause);
+        ctx->base.is_jmp = DISAS_NORETURN;
+
+	}	break;
+
+	case OPC_RH850_SNOOZE:
+		break;
+
+	//case OPC_RH850_STCW:
+	//	break;
+
+	case OPC_RH850_STSR_regID_reg2_selID:
+		regID=rs1;
+		selID = extract32(ctx->opcode, 27, 5);
+        if(selID == BANK_ID_BASIC_0  &&  regID == PSW_IDX){
+            TCGv tmp = tcg_temp_new_i32(tcg_ctx);
+            tcg_gen_movi_tl(tcg_ctx, tmp, 0);
+            flags_to_tcgv(tcg_ctx, tmp);
+            gen_set_gpr(tcg_ctx, rs2, tmp);
+            tcg_temp_free(tcg_ctx, tmp);
+        } else {
+            if (cpu_sysRegs[selID][regID] != NULL) {
+                gen_set_gpr(tcg_ctx, rs2, cpu_sysRegs[selID][regID]);
+            } else {
+                TCGv dat = tcg_temp_local_new(tcg_ctx);
+                tcg_gen_movi_i32(tcg_ctx, dat, 0);
+                gen_set_gpr(tcg_ctx, rs2, 0); // if sys reg does not exist, write 0
+                tcg_temp_free(tcg_ctx, dat);
+            }
+        }
+		break;
+
+	case OPC_RH850_SWITCH_reg1: {
+	    TCGv temp = tcg_temp_new_i32(tcg_ctx);
+	    TCGv adr = tcg_temp_new_i32(tcg_ctx);
+
+		gen_get_gpr(tcg_ctx, adr, rs1);
+		tcg_gen_shli_i32(tcg_ctx, adr, adr, 0x1);
+		tcg_gen_add_i32(tcg_ctx, adr, adr, cpu_pc);
+		tcg_gen_addi_i32(tcg_ctx, adr, adr, 0x2);
+
+		tcg_gen_addi_i32(tcg_ctx, cpu_pc, cpu_pc, 0x2);
+		tcg_gen_qemu_ld16s(tcg_ctx, temp, adr, MEM_IDX);
+		tcg_gen_ext16s_i32(tcg_ctx, temp, temp);
+		tcg_gen_shli_i32(tcg_ctx, temp, temp, 0x1);
+		tcg_gen_add_i32(tcg_ctx, cpu_pc, cpu_pc, temp);
+	    ctx->base.is_jmp = DISAS_EXIT_TB;
+	} break;
+
+	// SYNC instructions will not be implemented
+	case OPC_RH850_SYNCE:
+	case OPC_RH850_SYNCI:
+	case OPC_RH850_SYNCM:
+	case OPC_RH850_SYNCP:
+		break;
+
+	case OPC_RH850_TRAP:
+        {
+            int vector5 = rs1;
+            TCGv_i32 excp = tcg_const_i32(tcg_ctx, RH850_EXCP_TRAP);
+            TCGv_i32 cause = tcg_const_i32(tcg_ctx, vector5 + 0x40);
+            gen_helper_raise_exception_with_cause(tcg_ctx, tcg_ctx->cpu_env, excp, cause);
+            tcg_temp_free_i32(tcg_ctx, excp);
+            tcg_temp_free_i32(tcg_ctx, cause);
+            ctx->base.is_jmp = DISAS_NORETURN;
+        }
+        break;
+
+	case OPC_RH850_SYSCALL:
+		{
+            int vector = extract32(ctx->opcode, 0, 5) | ((extract32(ctx->opcode, 27, 3)) << 5);
+            // int vector=5;
+            TCGv_i32 excp = tcg_const_i32(tcg_ctx, RH850_EXCP_SYSCALL);
+            TCGv_i32 cause = tcg_const_i32(tcg_ctx, vector + 0x8000);
+            gen_helper_raise_exception_with_cause(tcg_ctx, tcg_ctx->cpu_env, excp, cause);
+            tcg_temp_free_i32(tcg_ctx, excp);
+            tcg_temp_free_i32(tcg_ctx, cause);
+            ctx->base.is_jmp = DISAS_NORETURN;
+        }
+        break;
+	}
+}
+
+/* Cache operations are not supported on single core emulation. */
+static void gen_cache(DisasContext *ctx, int rs1, int rs2, int operation){
+	int cache_op = (extract32(ctx->opcode,11, 2) << 5 ) | (extract32(ctx->opcode, 27, 5));
+	switch(cache_op){
+		case CHBII:
+			// printf("CHBII\n");
+			break;
+		case CIBII:
+			// printf("CIBII\n");
+			break;
+		case CFALI:
+			// printf("CFALI\n");
+			break;
+		case CISTI:
+			// printf("CISTI\n");
+			break;
+		case CILDI:
+			// printf("CILDI\n");
+			break;
+		case CLL:
+			// printf("CLL\n");
+		    // this operation is not implemented on single core
+			break;
+	}
+}
+
+/* 48-bit RH850 instruction decoding */
+static void decode_RH850_48(CPURH850State *env, DisasContext *ctx)
+{
+	int rs1, rs3;
+	uint64_t opcode48;
+
+	rs1 = GET_RS1(ctx->opcode);
+	rs3 = extract32(ctx->opcode, 27, 5);
+
+	opcode48 = (ctx->opcode1);
+	opcode48 = (ctx->opcode) | (opcode48  << 0x20);
+	uint32_t opcode20 = extract32(opcode48,0,20) & 0xfffe0;
+
+	uint32_t disp23 = (ctx->opcode1 << 7) | (extract32(ctx->opcode, 21, 6) << 1);
+	uint32_t disp32 = (opcode48 >> 16);
+
+	switch(opcode20) {
+
+
+		case OPC_RH850_LDB2:
+	        gen_load(ctx, MO_SB, rs3, rs1, disp23, 1);
+			break;
+		case OPC_RH850_LDH2:
+	        gen_load(ctx, MO_TESW, rs3, rs1, disp23, 1);
+			break;
+		case OPC_RH850_LDW2:
+	        gen_load(ctx, MO_TESL, rs3, rs1, disp23, 1);
+			break;
+		case OPC_RH850_LDDW:
+	        gen_load(ctx, MO_TEQ, rs3, rs1, disp23, 1);
+			break;
+		case OPC_RH850_LDBU2:
+	        gen_load(ctx, MO_UB, rs3, rs1, disp23, 1);
+			break;
+		case OPC_RH850_LDHU2:
+	        gen_load(ctx, MO_TEUW, rs3, rs1, disp23, 1);
+			break;
+
+		case OPC_RH850_STB2:
+	        gen_store(ctx, MO_SB, rs1, rs3, disp23, 1);
+			break;
+		case OPC_RH850_STH2:
+	        gen_store(ctx, MO_TESW, rs1, rs3, disp23, 1);
+			break;
+		case OPC_RH850_STW2:
+	        gen_store(ctx, MO_TESL, rs1, rs3, disp23, 1);
+			break;
+		case OPC_RH850_STDW:
+	    	gen_store(ctx, MO_TEQ, rs1, rs3, disp23, 1);
+			break;
+	}
+
+	if (extract32(ctx->opcode, 5, 11) == 0x31) {
+		gen_arithmetic(ctx, 0, rs1, OPC_RH850_MOV_imm32_reg1);
+	} else if (extract32(ctx->opcode, 5, 12) == 0x37) {
+		gen_jmp(ctx, rs1, disp32, OPC_RH850_JMP_disp32_reg1);
+	} else if (extract32(ctx->opcode, 5, 11) == 0x17) {
+		if (rs1 == 0x0){
+			gen_jmp(ctx, 0, disp32, OPC_RH850_JR_imm32);
+
+		} else {
+			gen_jmp(ctx, rs1, disp32, OPC_RH850_JARL_disp32_reg1);
+		}
+	}
+}
+
+/* 32-bit RH850 instruction decoding */
+static void decode_RH850_32(CPURH850State *env, DisasContext *ctx)
+{
+    TCGContext *tcg_ctx = ctx->uc->tcg_ctx;
+
+	int rs1;
+	int rs2;
+	int cond;
+	uint32_t op;
+	uint32_t formXop;
+	uint32_t checkXII;
+	uint32_t check32bitZERO;
+	target_long imm_32;
+	target_long ld_imm;
+
+	op = MASK_OP_MAJOR(ctx->opcode);
+	rs1 = GET_RS1(ctx->opcode);			// rs1 is at b0-b4;
+	rs2 = GET_RS2(ctx->opcode);			// rs2 is at b11-b15;
+	TCGv r1 = tcg_temp_local_new(tcg_ctx);
+	TCGv r2 = tcg_temp_local_new(tcg_ctx);
+	imm_32 = GET_IMM_32(ctx->opcode);
+	ld_imm = extract32(ctx->opcode, 16, 16);
+
+	gen_get_gpr(tcg_ctx, r1, rs1);
+	gen_get_gpr(tcg_ctx, r2, rs2);
+
+	switch(op){
+
+		case OPC_RH850_LDB:
+	        gen_load(ctx, MO_SB, rs2, rs1, ld_imm, 0);
+	    	break;
+
+	    case OPC_RH850_LDH_LDW:
+	    	if ( extract32(ctx->opcode, 16, 1) == 0 ){
+	    		gen_load(ctx, MO_TESW, rs2, rs1, ld_imm, 0);	// LD.H
+	    	}
+	    	else{
+	    		gen_load(ctx, MO_TESL, rs2, rs1, ld_imm & 0xfffe, 0);	// LD.W
+	    	}
+	    	break;
+
+	    case OPC_RH850_STB:
+	    	gen_store(ctx, MO_SB, rs1, rs2, (extract32(ctx->opcode, 16, 16)), 0);
+	    	break;
+
+	    case OPC_RH850_STH_STW:
+	    	if ( extract32(ctx->opcode, 16, 1)==1 ) {
+	    		gen_store(ctx, MO_TESL, rs1, rs2, ((extract32(ctx->opcode, 17, 15))) << 1, 0);
+	    		//this is STORE WORD
+	    		break;
+	    	}
+	    	gen_store(ctx, MO_TESW, rs1, rs2, ((extract32(ctx->opcode, 17, 15))) << 1, 0);
+	    	//this is STORE HALFWORD
+	    	break;
+
+	    case OPC_RH850_ADDI_imm16_reg1_reg2:
+	    	gen_arithmetic(ctx, rs1,rs2, OPC_RH850_ADDI_imm16_reg1_reg2);
+	    	break;
+
+	    case OPC_RH850_ANDI_imm16_reg1_reg2:
+	    	gen_logical(ctx, rs1, rs2, OPC_RH850_ANDI_imm16_reg1_reg2);
+	    	break;
+
+	    case OPC_RH850_MOVEA:
+	    	if ( extract32(ctx->opcode, 11, 5) == 0 ){
+	    		// This is 48bit MOV
+	    		// This instruction should be reached first in decode_RH850_48
+	    	} else {
+	    		gen_arithmetic(ctx, rs1, rs2, OPC_RH850_MOVEA_imm16_reg1_reg2);
+	    	}
+	    	break;
+
+	    case OPC_RH850_MOVHI_imm16_reg1_reg2:
+	    	if(extract32(ctx->opcode, 11, 5)!=0x0){
+	    		gen_arithmetic(ctx, rs1, rs2, OPC_RH850_MOVHI_imm16_reg1_reg2);
+	    	} else {
+	    		if(extract32(ctx->opcode, 16, 5)==0x0){
+	    			gen_special(ctx, env, rs1, rs2, OPC_RH850_DISPOSE_imm5_list12);
+	    		} else {
+	    			gen_special(ctx, env, rs1, rs2, OPC_RH850_DISPOSE_imm5_list12_reg1);
+	    		}
+	    	}
+	    	break;
+
+	    case OPC_RH850_ORI_imm16_reg1_reg2:
+	    	gen_logical(ctx, rs1, rs2, OPC_RH850_ORI_imm16_reg1_reg2);
+	    	break;
+
+	    case OPC_RH850_SATSUBI_imm16_reg1_reg2:
+	    	if(extract32(ctx->opcode, 11, 5)!=0x0){
+	    		gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATSUBI_imm16_reg1_reg2);
+			} else {
+				if(extract32(ctx->opcode, 16, 5)==0x0){
+					gen_special(ctx, env, rs1, rs2, OPC_RH850_DISPOSE_imm5_list12);
+				} else {
+					gen_special(ctx, env, rs1, rs2, OPC_RH850_DISPOSE_imm5_list12_reg1);
+				}
+			}
+
+	    	break;
+	    case OPC_RH850_XORI_imm16_reg1_reg2:
+	    	gen_logical(ctx, rs1, rs2, OPC_RH850_XORI_imm16_reg1_reg2);
+	    	break;
+
+	    case OPC_RH850_LOOP:
+	    	if (extract32(ctx->opcode, 11, 5) == 0x0)
+	    		gen_loop(ctx, rs1, ld_imm & 0xfffe);	// LOOP
+	    	else
+	    		gen_multiply(ctx, rs1, rs2, OPC_RH850_MULHI_imm16_reg1_reg2);
+	    	break;
+	    case OPC_RH850_BIT_MANIPULATION_2:
+
+	    	switch(extract32(ctx->opcode, 14, 2)){
+				case 0:
+					gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_SET1_bit3_disp16_reg1);
+					break;
+				case 1:
+					gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_NOT1_bit3_disp16_reg1);
+					break;
+				case 2:
+					gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_CLR1_bit3_disp16_reg1);
+					break;
+				case 3:
+					gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_TST1_bit3_disp16_reg1);
+					break;
+				}
+	    	break;
+		case OPC_RH850_32bit_1:		/* case for opcode = 111111 ; formats IX, X, XI, XII */
+			if (extract32(ctx->opcode, 16, 1) == 0x1 ) {
+                /* BCOND disp17 */
+				if (rs2 == 0x0)
+                {
+					/* Get condition. */
+					cond = extract32(ctx->opcode, 0, 4);
+
+					/* Extract immediate value (16 higher bits of 17 bits set by the instruction). */
+                    imm_32 = ((extract32(ctx->opcode, 4, 1)<<16) | (extract32(ctx->opcode, 17, 15) << 1));
+
+					/* Sign-extend value to 32 bits. */
+                    if ((imm_32 & 0x10000) == 0x10000)
+                    {
+                        imm_32 |= (0x7fff << 17);
+                    }
+
+					gen_branch(env, ctx, cond, rs1, rs2, imm_32);
+
+					break;
+				}
+                else
+                {
+					/* LD.HU */
+					gen_load(ctx, MO_TEUW, rs2, rs1, ld_imm & 0xfffe, 0);
+					break;
+				}
+			}
+			formXop = MASK_OP_32BIT_SUB(ctx->opcode);		//sub groups based on bits b23-b26
+			switch(formXop){
+				case OPC_RH850_LDSR_RIE_SETF_STSR:
+					check32bitZERO = extract32(ctx->opcode, 21, 2);
+					switch(check32bitZERO){
+					case 0:
+						if(extract32(ctx->opcode, 4, 1)==1)
+                        {
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_RIE);
+						}
+                        else
+                        {
+                            printf("gen SETF\r\n");
+							gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SETF_cccc_reg2);
+						}
+						break;
+					case OPC_RH850_LDSR_reg2_regID_selID:
+					    gen_special(ctx, env, rs1, rs2, OPC_RH850_LDSR_reg2_regID_selID);
+						break;
+					case OPC_RH850_STSR_regID_reg2_selID:
+						gen_special(ctx, env, rs1, rs2, OPC_RH850_STSR_regID_reg2_selID);
+						break;
+					}
+					break;
+				case OPC_RH850_FORMAT_IX:		//format IX instructions
+					formXop = MASK_OP_FORMAT_IX(ctx->opcode);	//mask on bits 21, 22
+					switch(formXop)
+                    {
+					case OPC_RH850_BINS_0:
+						if (extract32(ctx->opcode, 20, 1) == 1)
+                        {
+							//BINS0
+							gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_BINS);
+						}
+						else
+                        {
+							if (extract32(ctx->opcode, 17, 1) == 0)
+                            {
+								gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SHR_reg1_reg2);
+							}
+                            else
+                            {
+								gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SHR_reg1_reg2_reg3);
+							}
+						}
+						break;
+					case OPC_RH850_BINS_1:
+						if (extract32(ctx->opcode, 20, 1) == 1)
+                        {
+							//BINS1
+							gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_BINS);
+						}
+						else
+                        {
+							if (extract32(ctx->opcode, 17, 1) == 0)
+                            {
+								gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SAR_reg1_reg2);
+							}
+                            else
+                            {
+								gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SAR_reg1_reg2_reg3);
+							}
+						}
+					break;
+					case OPC_RH850_BINS_2:
+						if (extract32(ctx->opcode, 20, 1) == 1)
+                        {
+							//BINS2
+							gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_BINS);
+						}
+						else
+                        {
+							if (extract32(ctx->opcode, 17, 1) == 0)
+                            {
+								if (extract32(ctx->opcode, 18, 1) == 1)
+                                {
+									gen_data_manipulation(ctx, rs1, rs2,
+											OPC_RH850_ROTL_imm5_reg2_reg3);
+								}
+								else
+                                {
+									gen_data_manipulation(ctx, rs1, rs2,
+											OPC_RH850_SHL_reg1_reg2);
+								}
+							}
+                            else
+                            {
+								if (extract32(ctx->opcode, 18, 1) == 1)
+                                {
+									gen_data_manipulation(ctx, rs1, rs2,
+											OPC_RH850_ROTL_reg1_reg2_reg3);
+								}
+								else
+                                {
+									gen_data_manipulation(ctx, rs1, rs2,
+											OPC_RH850_SHL_reg1_reg2_reg3);
+								}
+							}
+						}
+						break;
+					case OPC_RH850_BIT_MANIPULATION: // in format IX
+						check32bitZERO = extract32(ctx->opcode, 16, 3);
+						switch(check32bitZERO){
+						case OPC_RH850_SET1_reg2_reg1:
+							gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_SET1_reg2_reg1);
+							break;
+						case OPC_RH850_NOT1_reg2_reg1:
+							gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_NOT1_reg2_reg1);
+							break;
+						case OPC_RH850_CLR1_reg2_reg1:
+							gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_CLR1_reg2_reg1);
+							break;
+						case OPC_RH850_TST1_reg2_reg1:
+							if (extract32(ctx->opcode, 19, 1) == 0){
+								gen_bit_manipulation(ctx, rs1, rs2, OPC_RH850_TST1_reg2_reg1);
+							} else {
+								gen_special(ctx, env, rs1, rs2, OPC_RH850_CAXI_reg1_reg2_reg3);
+							}
+						}
+						break;
+					}
+					break;
+
+
+				case OPC_RH850_FORMAT_X:		//format X instructions
+												//(+JARL3 - added due to MASK_OP_FORMAT_X matching)
+					formXop = MASK_OP_FORMAT_X(ctx->opcode);
+
+					switch(formXop){
+
+						case OPC_RH850_CTRET:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_CTRET);
+							break;
+						case OPC_RH850_DI:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_DI);
+							break;
+						case OPC_RH850_EI:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_EI);
+							break;
+						case OPC_RH850_EIRET:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_EIRET);
+							break;
+						case OPC_RH850_FERET:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_FERET);
+							break;
+						case OPC_RH850_HALT:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_HALT);
+							break;
+						case OPC_RH850_JARL3:
+							gen_jmp(ctx, rs1, 0, OPC_RH850_JARL_reg1_reg3);
+							break;
+						case OPC_RH850_SNOOZE:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_SNOOZE);
+							break;
+						case OPC_RH850_SYSCALL:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_SYSCALL);
+							break;
+						case OPC_RH850_TRAP:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_TRAP);
+							break;
+						case OPC_RH850_PREF:
+							//printf("PREF \n");
+							break;
+						case OPC_RH850_POPSP_rh_rt:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_POPSP_rh_rt);
+							break;
+						case OPC_RH850_PUSHSP_rh_rt:
+							gen_special(ctx, env, rs1, rs2, OPC_RH850_PUSHSP_rh_rt);
+							break;
+						default:
+							if ((extract32(ctx->opcode, 13, 12) == 0xB07))
+							{
+								if ((extract32(ctx->opcode, 27, 5) == 0x1E) &&
+									(extract32(ctx->opcode, 0, 5) == 0x1F))
+								{
+									if ((extract32(ctx->opcode, 23, 4) == 0x2)) // CLL
+										gen_mutual_exclusion(ctx, extract32(ctx->opcode, 27, 5), rs1, operation_CLL);
+								} else {
+									//CACHE; if cacheop bits are 1111110, opcode matches CLL ins,
+									//then they are THE SAME instruction, so this should be correct
+									gen_cache(ctx,rs1,rs2, 1);
+								}
+							} else
+								printf("ERROR! \n");
+						break;
+					}
+					break;
+				case OPC_RH850_MUL_INSTS:
+					if (extract32(ctx->opcode, 22, 1) == 0)
+                    {
+						if (extract32(ctx->opcode, 21, 1) == 0)
+                        {
+							gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SASF_cccc_reg2);
+						}
+                        else
+                        {
+							if (extract32(ctx->opcode, 17, 1) == 1)
+                            {
+								gen_multiply(ctx, rs1, rs2, OPC_RH850_MULU_reg1_reg2_reg3);
+							}
+                            else
+                            {
+								gen_multiply(ctx, rs1, rs2, OPC_RH850_MUL_reg1_reg2_reg3);
+							}
+						}
+						break;
+					} else if (extract32(ctx->opcode, 22, 1) == 1)
+                    {
+						if (extract32(ctx->opcode, 17, 1) == 1)
+                        {
+							gen_multiply(ctx, rs1, rs2, OPC_RH850_MULU_imm9_reg2_reg3);
+						}
+                        else
+                        {
+							gen_multiply(ctx, rs1, rs2, OPC_RH850_MUL_imm9_reg2_reg3);
+						}
+						break;
+					}
+					break;
+
+				case OPC_RH850_FORMAT_XI:			// DIV instructions in format XI
+					formXop = extract32(ctx->opcode, 16, 7);
+					switch(formXop){
+
+						case OPC_RH850_DIV_reg1_reg2_reg3:
+							gen_divide(ctx, rs1, rs2, OPC_RH850_DIV_reg1_reg2_reg3);
+							//DIV
+							break;
+						case OPC_RH850_DIVH_reg1_reg2_reg3:
+							gen_divide(ctx, rs1, rs2, OPC_RH850_DIVH_reg1_reg2_reg3);
+							//DIVH 2
+							break;
+						case OPC_RH850_DIVHU_reg1_reg2_reg3:
+							gen_divide(ctx, rs1, rs2, OPC_RH850_DIVHU_reg1_reg2_reg3);
+							//DIVHU
+							break;
+
+						case OPC_RH850_DIVQ:
+							gen_divide(ctx, rs1, rs2, OPC_RH850_DIV_reg1_reg2_reg3);
+							//DIVQ => using DIV implementation, will be changed if needed
+							break;
+						case OPC_RH850_DIVQU:
+							gen_divide(ctx, rs1, rs2, OPC_RH850_DIVU_reg1_reg2_reg3);
+							//DIVQU => using DIVU implementation, will be changed if needed
+							break;
+						case OPC_RH850_DIVU_reg1_reg2_reg3:
+							gen_divide(ctx, rs1, rs2, OPC_RH850_DIVU_reg1_reg2_reg3);
+							//DIVU
+							break;
+					}
+					break;
+
+				case OPC_RH850_FORMAT_XII:	// for opcode = 0110 ; format XII instructions
+											//excluding MUL and including CMOV
+											// also LDL.W and STC.W	(Format VII)
+					checkXII = extract32(ctx->opcode, 21, 2);
+
+					switch(checkXII)
+                    {
+					case 0:
+						gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_CMOV_cccc_imm5_reg2_reg3);
+						break;
+					case 1:
+						gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_CMOV_cccc_reg1_reg2_reg3);
+						break;
+					case 2:
+						formXop = extract32(ctx->opcode, 17, 2);
+
+						switch(formXop)
+                        {
+						case OPC_RH850_BSW_reg2_reg3:
+							gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_BSW_reg2_reg3);
+							break;
+						case OPC_RH850_BSH_reg2_reg3:
+							gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_BSH_reg2_reg3);
+							break;
+						case OPC_RH850_HSW_reg2_reg3:
+							//HSW
+							gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_HSW_reg2_reg3);
+							break;
+						case OPC_RH850_HSH_reg2_reg3:
+							//HSH
+							gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_HSH_reg2_reg3);
+							break;
+						}
+						break;
+					case 3:	//these are SCHOL, SCHOR, SCH1L, SCH1R. 	Also LDL.W
+						formXop = extract32(ctx->opcode, 17, 2);
+						switch(formXop)
+                        {
+						case OPC_RH850_SCH0R_reg2_reg3:
+							if (extract32(ctx->opcode, 5, 11) == 0x3F &&
+									extract32(ctx->opcode, 16, 5) == 0x18)
+								gen_mutual_exclusion(ctx, extract32(ctx->opcode, 27, 5),
+										rs1, operation_LDL_W);
+							else
+								gen_bit_search(ctx, rs2, OPC_RH850_SCH0R_reg2_reg3);
+							break;
+						case OPC_RH850_SCH1R_reg2_reg3:
+							if (extract32(ctx->opcode, 19, 2) == 0x0)
+                            {
+								gen_bit_search(ctx, rs2, OPC_RH850_SCH1R_reg2_reg3);
+							}
+                            else if (extract32(ctx->opcode, 5, 11) == 0x3F &&
+									extract32(ctx->opcode, 16, 5) == 0x1a)
+								gen_mutual_exclusion(ctx, extract32(ctx->opcode, 27, 5),
+										rs1, operation_STC_W);
+							break;
+						case OPC_RH850_SCH0L_reg2_reg3:
+							gen_bit_search(ctx, rs2, OPC_RH850_SCH0L_reg2_reg3);
+							break;
+						case OPC_RH850_SCH1L_reg2_reg3:
+							gen_bit_search(ctx, rs2, OPC_RH850_SCH1L_reg2_reg3);
+							break;
+						}
+
+					}
+					break;
+
+				case OPC_RH850_ADDIT_ARITH:
+					formXop = extract32(ctx->opcode, 21, 2);
+					switch(formXop)
+                    {
+
+						case OPC_RH850_ADF_SATADD3:
+							if (extract32(ctx->opcode, 16, 5) == 0x1A)
+                            {
+								gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATADD_reg1_reg2_reg3);
+							}
+                            else
+                            {
+								gen_cond_arith(ctx, rs1, rs2, OPC_RH850_ADF_cccc_reg1_reg2_reg3);
+							}
+							break;
+						case OPC_RH850_SBF_SATSUB:
+							if (extract32(ctx->opcode, 16, 5) == 0x1A)
+                            {
+								gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATSUB_reg1_reg2_reg3);
+							}
+                            else
+                            {
+								gen_cond_arith(ctx, rs1, rs2, OPC_RH850_SBF_cccc_reg1_reg2_reg3);
+							}
+							break;
+						case OPC_RH850_MAC_reg1_reg2_reg3_reg4:
+							gen_mul_accumulate(ctx, rs1, rs2, OPC_RH850_MAC_reg1_reg2_reg3_reg4);
+							break;
+						case OPC_RH850_MACU_reg1_reg2_reg3_reg4:
+							gen_mul_accumulate(ctx, rs1, rs2, OPC_RH850_MACU_reg1_reg2_reg3_reg4);
+							break;
+					}
+                    break;
+
+                /* Floating-point instruction format F:I. */
+                case OPC_RH850_FORMAT_FI_CAT0:
+                {
+                    /* Dispatch to FPU generator (category 0). */
+                    fpu_decode_cat0_instn(env, ctx);
+                }
+                break;
+
+                case OPC_RH850_FORMAT_FI_CAT1:
+                {
+                    /* Dispatch to FPU generator (category 1). */
+                    fpu_decode_cat1_instn(env, ctx);
+                }
+                break;
+			}
+	}
+
+	if (MASK_OP_FORMAT_V_FORMAT_XIII(ctx->opcode) == OPC_RH850_FORMAT_V_XIII){
+		if(extract32(ctx->opcode, 16, 1) == 0)
+        {
+		    uint32_t disp22 = extract32(ctx->opcode, 16, 16) |
+		    		(extract32(ctx->opcode, 0, 6) << 16 );
+		    if( (disp22 & 0x200000) == 0x200000)
+            {
+		    	disp22 = disp22 | (0x3ff << 22);
+		    }
+
+			if (extract32(ctx->opcode, 11, 5) == 0)
+            {
+				gen_jmp(ctx, 0, disp22, OPC_RH850_JR_imm22);	//JR disp22
+			}
+            else
+            {
+				gen_jmp(ctx, 0, disp22, OPC_RH850_JARL_disp22_reg2);
+			}
+		}
+        else
+        {
+			if (extract32(ctx->opcode, 11, 5) != 0)
+            {
+				//LD.BU
+				gen_load(ctx, MO_UB, rs2, rs1, (ld_imm & 0xfffe) | extract32(ctx->opcode, 5, 1), 0);
+
+			}
+            else
+            {
+				if (extract32(ctx->opcode, 16, 3) == 0x3){
+					gen_special(ctx, env, rs1, rs2, OPC_RH850_PREPARE_list12_imm5_sp);
+					//PREPARE2
+				}
+				 else if (extract32(ctx->opcode, 16, 3) == 0x1){
+					 gen_special(ctx, env, rs1, rs2, OPC_RH850_PREPARE_list12_imm5);
+					 //PREPARE1
+				 }
+			}
+		}
+	}
+
+	tcg_temp_free(tcg_ctx, r1);
+    tcg_temp_free(tcg_ctx, r2);
+}
+
+/* 16-bit RH850 instruction decoding */
+static void decode_RH850_16(CPURH850State *env, DisasContext *ctx)
+{
+	int rs1;
+	int rs2;
+	int cond;
+	uint32_t op;
+	uint32_t subOpCheck;
+	uint32_t imm;
+	uint32_t disp32 = 0;
+
+	op = MASK_OP_MAJOR(ctx->opcode);
+	rs1 = GET_RS1(ctx->opcode);			// rs1 at bits b0-b4;
+	rs2 = GET_RS2(ctx->opcode);			// rs2 at bits b11-b15;
+	imm = rs1;
+
+	if((op & 0xf << 7) == OPC_RH850_BCOND )
+    { // checking for 4 bit opcode for BCOND
+		cond = extract32(ctx->opcode, 0, 4);
+		imm = ( extract32(ctx->opcode, 4, 3) | (extract32(ctx->opcode, 11, 5) << 3)) << 1 ;
+
+		if ( (imm & 0x100) == 0x100){
+			imm |=  (0x7fffff << 9);
+		}
+		gen_branch(env, ctx, cond, rs1, rs2, imm);
+
+		return;
+	}
+
+	switch(op)
+    {
+	case OPC_RH850_16bit_0:
+		if (rs2 != 0) {
+			gen_arithmetic(ctx, rs1, rs2, OPC_RH850_MOV_reg1_reg2);
+			break;
+		} else {
+			subOpCheck = MASK_OP_FORMAT_I_0(op);
+			switch(subOpCheck){
+				case OPC_RH850_NOP:
+					break;
+				case OPC_RH850_SYNCI:
+					break;
+				case OPC_RH850_SYNCE:
+					break;
+				case OPC_RH850_SYNCM:
+					break;
+				case OPC_RH850_SYNCP:
+					break;
+			}
+		}
+		break;
+
+	case OPC_RH850_16bit_2:
+		if (rs2 == 0)
+        {
+			if (rs1 == 0)
+            {
+				gen_special(ctx, env, rs1, rs2, OPC_RH850_RIE);
+				break;
+			}
+            else
+            {
+				gen_special(ctx, env, rs1, rs2, OPC_RH850_SWITCH_reg1);
+				break;
+			}
+		}
+        else
+        {
+			if (rs1 == 0)
+            {
+				gen_special(ctx, env, rs1, rs2, OPC_RH850_FETRAP_vector4);
+				break;
+			}
+            else
+            {
+				gen_divide(ctx, rs1, rs2, OPC_RH850_DIVH_reg1_reg2);
+				break;
+			}
+		}
+		break;
+
+	case OPC_RH850_16bit_4:
+		if (rs2 == 0)
+        {
+			gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_ZXB_reg1);
+			break;
+		}
+        else
+        {
+			gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATSUBR_reg1_reg2);
+			break;
+		}
+		break;
+	case OPC_RH850_16bit_5:
+		if (rs2 == 0)
+        {
+			gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SXB_reg1);
+			break;
+		}
+        else
+        {
+			gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATSUB_reg1_reg2);
+			break;
+		}
+		break;
+	case OPC_RH850_16bit_6:
+		if (rs2 == 0)
+        {
+			gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_ZXH_reg1);
+			break;
+		}
+        else
+        {
+			gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATADD_reg1_reg2);
+			break;
+		}
+		break;
+	case OPC_RH850_16bit_7:
+		if (rs2 == 0)
+        {
+			gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SXH_reg1);
+			break;
+		}
+        else
+        {
+			gen_multiply(ctx, rs1, rs2, OPC_RH850_MULH_reg1_reg2);
+			break;
+		}
+		break;
+	case OPC_RH850_NOT_reg1_reg2:
+		gen_logical(ctx, rs1, rs2, OPC_RH850_NOT_reg1_reg2);
+		break;
+		// decode properly (handle also case when rs2 != 0), then uncomment
+        //	case OPC_RH850_JMP_DISP:
+		// JMP opcode: DDDD DDDD DDDD DDDD dddd dddd dddd ddd0 0000 0110 111R RRRR
+        //		disp32 = ctx->opcode >> 16;
+
+
+		// this case is already handled in decode_RH850_48()
+
+	case OPC_RH850_16bit_3:
+		if (rs2 == 0)
+        {   // JMP
+			gen_jmp(ctx, rs1, disp32, OPC_RH850_JMP_reg1);
+			break;
+		}
+        else
+        {
+			if(extract32(rs1,4,1)==1){
+				//SLD.HU
+				gen_load(ctx, MO_TEUW, rs2, 30, extract32(ctx->opcode, 0, 4) << 1, 0);
+			}else{
+				//SLD.BU
+				gen_load(ctx, MO_UB, rs2, 30, extract32(ctx->opcode, 0, 4), 0);
+			}
+			break;
+		}
+		break;
+	case OPC_RH850_OR_reg1_reg2:
+		gen_logical(ctx, rs1, rs2, OPC_RH850_OR_reg1_reg2);
+		break;
+	case OPC_RH850_XOR_reg1_reg2:
+		gen_logical(ctx, rs1, rs2, OPC_RH850_XOR_reg1_reg2);
+		break;
+	case OPC_RH850_AND_reg1_reg2:
+		gen_logical(ctx, rs1, rs2, OPC_RH850_AND_reg1_reg2);
+		break;
+	case OPC_RH850_TST_reg1_reg2:
+		gen_logical(ctx, rs1, rs2, OPC_RH850_TST_reg1_reg2);
+		break;
+	case OPC_RH850_SUBR_reg1_reg2:
+		gen_arithmetic(ctx, rs1, rs2, OPC_RH850_SUBR_reg1_reg2);
+		break;
+	case OPC_RH850_SUB_reg1_reg2:
+		gen_arithmetic(ctx, rs1, rs2, OPC_RH850_SUB_reg1_reg2);
+		break;
+	case OPC_RH850_ADD_reg1_reg2:
+		gen_arithmetic(ctx, rs1, rs2, OPC_RH850_ADD_reg1_reg2);
+		break;
+	case OPC_RH850_CMP_reg1_reg2:
+		gen_arithmetic(ctx, rs1, rs2, OPC_RH850_CMP_reg1_reg2);
+		break;
+	case OPC_RH850_16bit_16:
+		if (rs2 == 0)
+        {
+			gen_special(ctx, env, rs1, rs2, OPC_RH850_CALLT_imm6);
+			break;
+		}
+        else
+        {
+			gen_arithmetic(ctx, imm, rs2, OPC_RH850_MOV_imm5_reg2);
+			break;
+		}
+		break;
+	case OPC_RH850_16bit_17:
+		if (rs2 == 0)
+        {
+			gen_special(ctx, env, rs1, rs2, OPC_RH850_CALLT_imm6);
+			break;
+		}
+        else
+        {
+			gen_sat_op(ctx, rs1, rs2, OPC_RH850_SATADD_imm5_reg2);
+			break;
+		}
+		break;
+	case OPC_RH850_ADD_imm5_reg2:
+		gen_arithmetic(ctx, rs1, rs2, OPC_RH850_ADD_imm5_reg2);
+		break;
+	case OPC_RH850_CMP_imm5_reg2:
+		gen_arithmetic(ctx, rs1, rs2, OPC_RH850_CMP_imm5_reg2);
+		break;
+	case OPC_RH850_SHR_imm5_reg2:
+		gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SHR_imm5_reg2);
+		break;
+	case OPC_RH850_SAR_imm5_reg2:
+		gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SAR_imm5_reg2);
+		break;
+	case OPC_RH850_SHL_imm5_reg2:
+		gen_data_manipulation(ctx, rs1, rs2, OPC_RH850_SHL_imm5_reg2);
+		break;
+	case OPC_RH850_MULH_imm5_reg2:
+		gen_multiply(ctx, rs1, rs2, OPC_RH850_MULH_imm5_reg2);
+		break;
+	}
+
+	//Format IV ; dividing on code bits b7-b10
+	uint32_t opIV = (op >> 7);
+	opIV = opIV << 5;
+
+	switch(opIV)
+    {
+	case OPC_RH850_16bit_SLDB:
+		gen_load(ctx, MO_SB, rs2, 30, extract32(ctx->opcode, 0, 7), 0);
+		break;
+	case OPC_RH850_16bit_SLDH:
+		gen_load(ctx, MO_TESW, rs2, 30, extract32(ctx->opcode, 0, 7) << 1, 0);
+		break;
+	case OPC_RH850_16bit_IV10:
+		if ( extract32(rs1,0,1) == 1 ) {
+			//SST.W
+	    	gen_store(ctx, MO_TEUL, 30, rs2, (extract32(ctx->opcode, 1, 6)) << 2, 0);
+			/// Note An MAE or MDP exception might occur
+	    	/// depending on the result of address calculation.
+		}
+		else{
+			//SLD.W
+			gen_load(ctx, MO_TESL, rs2, 30, extract32(ctx->opcode, 1, 6) << 2, 0);
+		}
+		break;
+	case OPC_RH850_16bit_SSTB:
+    	gen_store(ctx, MO_UB, 30, rs2, (extract32(ctx->opcode, 0, 7)), 0);
+    	/// Note An MDP exception might occur depending on the result of address calculation.
+		break;
+	case OPC_RH850_16bit_SSTH:
+    	gen_store(ctx, MO_TEUW, 30, rs2, (extract32(ctx->opcode, 0, 7)) << 1, 0);
+    	/// Note An MAE or MDP exception might occur
+    	///depending on the result of address calculation.
+		break;
+	}
+}
+
+
+// ###################################################################################
+// ###################################################################################
+// ###################################################################################
+
+static void rh850_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+    struct uc_struct *uc = cpu->uc;
+    dc->uc = uc;
+
+    CPURH850State *env = cpu->env_ptr;
+    dc->env = env;
+    dc->pc = dc->base.pc_first;
+}
+
+static void rh850_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
+{
+}
+
+static void rh850_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+    TCGContext *tcg_ctx = dc->uc->tcg_ctx;
+
+    tcg_gen_insn_start(tcg_ctx, dc->base.pc_next);
+}
+
+/*
+ * This f. is called when breakpoint is hit. It should implement
+ * handling of breakpoint - for example HW breakpoints may be
+ * handled differently from SW breakpoints (see arm/translate.c).
+ * However, in RH850 we currently implement only SW breakpoints.
+ *
+ * Comment from translator.c:
+ *     The breakpoint_check hook may use DISAS_TOO_MANY to indicate
+ *     that only one more instruction is to be executed.  Otherwise
+ *     it should use DISAS_NORETURN when generating an exception,
+ *     but may use a DISAS_TARGET_* value for Something Else.
+ */
+static bool rh850_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
+                                     const CPUBreakpoint *bp)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    gen_exception_debug(dc);
+    /* The address covered by the breakpoint must be included in
+       [tb->pc, tb->pc + tb->size) in order to for it to be
+       properly cleared -- thus we increment the PC here so that
+       the logic setting tb->size below does the right thing.  */
+    dc->base.pc_next += 2;
+    dc->base.is_jmp = DISAS_NORETURN;
+    return true;
+}
+
+/* RH850 instruction translation callback.  */
+static void rh850_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+    struct uc_struct *uc = dc->uc;
+    TCGContext *tcg_ctx = uc->tcg_ctx;
+    TCGOp *tcg_op, *prev_op = NULL;
+    CPURH850State *env = dc->env;
+    bool insn_hook = false;
+
+    if (uc_addr_is_exit(dc->uc, dc->base.pc_next)) {
+        // imitate PGM exception to halt emulation
+        dcbase->is_jmp = DISAS_UNICORN_HALT;
+    }
+    else
+    {
+        #if 0
+        // Unicorn: trace this instruction on request
+        if (HOOK_EXISTS_BOUNDED(uc, UC_HOOK_CODE, dc->pc)) {
+
+            // Sync PC in advance
+            tcg_gen_movi_i32(tcg_ctx, cpu_pc, dc->pc);
+    
+            // save the last operand
+            prev_op = tcg_last_op(tcg_ctx);
+            insn_hook = true;
+    
+            gen_uc_tracecode(tcg_ctx, 0xF1F1F1F1, UC_HOOK_CODE_IDX, env->uc, dc->pc);
+            
+            // the callback might want to stop emulation immediately
+            check_exit_request(tcg_ctx);
+        }
+        #endif
+
+        dc->opcode = cpu_lduw_code(env, dc->pc);  // get opcode from memory
+
+        if ((extract32(dc->opcode, 9, 2) != 0x3) && (extract32(dc->opcode, 5, 11) != 0x17)) {
+            dc->base.pc_next = dc->pc + 2;
+            decode_RH850_16(env, dc);		//this function includes 32-bit JR and JARL
+        } else {
+            dc->opcode = (dc->opcode) | (cpu_lduw_code(env, dc->pc + 2) << 0x10);
+            if (((extract32(dc->opcode, 6, 11) == 0x41e) && ((extract32(dc->opcode, 17, 2) > 0x1) ||
+                                                            (extract32(dc->opcode, 17, 3) == 0x4))) ||
+                (extract32(dc->opcode, 5, 11) == 0x31) || // 48-bit MOV
+                (extract32(dc->opcode, 5, 12) == 0x37) || // 48-bit JMP
+                (extract32(dc->opcode, 5, 11) == 0x17) || // 48-bit JARL & JR
+                ((extract32(dc->opcode, 5, 11) == 0x3D) && (extract32(dc->opcode, 16, 5) == 0x07)) // 48-bit LD.HU
+            )
+            {
+                dc->opcode1 = cpu_lduw_code(env, dc->pc + 4);
+                dc->base.pc_next = dc->pc + 6;
+                decode_RH850_48(env, dc);
+            }
+            else
+            {
+                dc->base.pc_next = dc->pc + 4;
+                decode_RH850_32(env, dc);
+            }
+        }
+
+        #if 0
+        if (insn_hook) {
+            // Unicorn: patch the callback to have the proper instruction size.
+            if (prev_op) {
+                // As explained further up in the function where prev_op is
+                // assigned, we move forward in the tail queue, so we're modifying the
+                // move instruction generated by gen_uc_tracecode() that contains
+                // the instruction size to assign the proper size (replacing 0xF1F1F1F1).
+                tcg_op = QTAILQ_NEXT(prev_op, link);
+            } else {
+                // this instruction is the first emulated code ever,
+                // so the instruction operand is the first operand
+                tcg_op = QTAILQ_FIRST(&tcg_ctx->ops);
+            }
+
+            tcg_op->args[1] = dc->base.pc_next - dc->pc;
+        }
+        #endif
+
+        dc->pc = dc->base.pc_next;   
+    }
+}
+
+static void update_pc_addr(DisasContext *s)
+{
+    /* psw.addr */
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_pc, s->base.pc_next);
+}
+
+// Emit exit TB code according to base.is_jmp
+static void rh850_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+    TCGContext *tcg_ctx = dc->uc->tcg_ctx;
+
+    if (dc->base.is_jmp == DISAS_NORETURN)
+    {
+        return;
+    }
+    if (dc->base.singlestep_enabled) {
+    	if (dc->base.is_jmp == DISAS_NEXT  ||  dc->base.is_jmp == DISAS_TOO_MANY) {
+    		// PC is not loaded inside TB, so we have to do it here in case of
+    		// single stepping
+    	    tcg_gen_movi_tl(tcg_ctx, cpu_pc, dc->pc);
+    	}
+    	gen_exception_debug(dc);
+    }
+
+    switch (dc->base.is_jmp)
+    {
+    case DISAS_UNICORN_HALT:
+        tcg_gen_movi_tl(tcg_ctx, cpu_pc, dc->pc);
+        gen_exception_halt(dc);
+        break;
+    case DISAS_TOO_MANY:
+    case DISAS_PC_STALE:
+    case DISAS_PC_STALE_NOCHAIN:
+        update_pc_addr(dc);
+        gen_goto_tb_imm(dc, 0, dc->pc);
+        break;
+    case DISAS_INDIRECT_JUMP:
+        /* PC in CPURH850State must have been updated!  */
+        tcg_gen_lookup_and_goto_ptr(tcg_ctx);
+        break;
+    case DISAS_EXIT_TB:
+        tcg_gen_exit_tb(tcg_ctx, NULL, 0);
+        break;
+    case DISAS_NORETURN:
+    case DISAS_TB_EXIT_ALREADY_GENERATED:
+    	break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static const TranslatorOps rh850_tr_ops = {
+    .init_disas_context = rh850_tr_init_disas_context,
+    .tb_start           = rh850_tr_tb_start,
+    .insn_start         = rh850_tr_insn_start,
+    .breakpoint_check   = rh850_tr_breakpoint_check,
+    .translate_insn     = rh850_tr_translate_insn,
+    .tb_stop            = rh850_tr_tb_stop,
+};
+
+/**
+ * This function translates one translation block (translation block
+ * is a sequence of instructions without jumps). Translation block
+ * is the longest translated sequence of instructions. The sequence
+ * may be shorter, if we are in singlestep mode (1 instruction), if
+ * breakpoint is detected, ... - see if statements, which break
+ * while loop below.
+ */
+
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
+{
+    DisasContext dc;
+    translator_loop(&rh850_tr_ops, &dc.base, cpu, tb, max_insns);
+}
+
+void rh850_translate_init(struct uc_struct *uc)
+{
+    TCGContext *tcg_ctx = uc->tcg_ctx;
+    int i;
+
+    /* cpu_gpr[0] is a placeholder for the zero register. Do not use it. */
+    /* Use the gen_set_gpr and gen_get_gpr helper functions when accessing */
+    /* registers, unless you specifically block writes to reg 0 */
+
+    for (i = 0; i < NUM_GP_REGS; i++) {
+        cpu_gpr[i] = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env,
+            offsetof(CPURH850State, gpRegs[i]), rh850_gp_regnames[i]);
+    }
+
+    for (int bankIdx = 0; bankIdx < NUM_SYS_REG_BANKS; bankIdx++) {
+        for (int regIdx = 0; regIdx < MAX_SYS_REGS_IN_BANK; regIdx++) {
+            const char *regName = rh850_sys_regnames[bankIdx][regIdx];
+            if (regName != NULL) {
+                cpu_sysRegs[bankIdx][regIdx] = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env,
+                                                                  offsetof(CPURH850State, systemRegs[bankIdx][regIdx]),
+                                                                  regName);
+            } else {
+                cpu_sysRegs[bankIdx][regIdx] = NULL;  // mark register as not present
+            }
+        }
+    }
+
+    for (i = 0; i < 1; i++) {
+        cpu_sysDatabuffRegs[i] = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env,
+            offsetof(CPURH850State, sysDatabuffRegs[i]), rh850_sys_databuff_regnames[i]);
+    }
+
+    // PSW register flags
+    cpu_ZF = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, Z_flag), "ZF");
+    cpu_SF = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, S_flag), "SF");
+	cpu_OVF = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, OV_flag), "OVF");
+	cpu_CYF = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, CY_flag), "CYF");
+	cpu_SATF = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, SAT_flag), "SAT");
+	cpu_ID = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, ID_flag), "ID");
+    cpu_EP = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, EP_flag), "EP");
+    cpu_NP = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, NP_flag), "NP");
+    cpu_EBV = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, EBV_flag), "EBV");
+    cpu_CU0 = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, CU0_flag), "CU0");
+    cpu_CU1 = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, CU1_flag), "CU1");
+    cpu_CU2 = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, CU2_flag), "CU2");
+    cpu_UM = tcg_global_mem_new_i32(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, UM_flag), "UM");
+
+    cpu_pc = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, pc), "pc");
+    load_res = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, load_res), "load_res");
+    load_val = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, load_val), "load_val");
+
+    cpu_LLbit = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, cpu_LLbit), "cpu_LLbit");
+    cpu_LLAddress = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURH850State, cpu_LLAddress), "cpu_LLAddress");
+
+}
diff --git a/qemu/target/rh850/translate.h b/qemu/target/rh850/translate.h
new file mode 100644
index 0000000000..a622c4ce58
--- /dev/null
+++ b/qemu/target/rh850/translate.h
@@ -0,0 +1,35 @@
+#ifndef _RH850_TRANSLATE_H
+#define _RH850_TRANSLATE_H
+
+#include "cpu.h"
+#include "exec/translator.h"
+#include "tcg/tcg-op.h"
+
+/**
+ * This structure contains data, which is needed to translate a
+ * sequence of instructions, usually  inside one translation
+ * block. The most important member is therefore 'pc', which
+ * points to the instruction to be translated. This variable stores
+ * PC during compile time (guest instructions to TCG instructions).
+ * We must increment this variable manually during translation
+ * according to instruction size.
+ * Note: Consider renaming to TranslationContext, instead of DisasContext,
+ * because it contains information for translation, not disassembler.
+ */
+typedef struct DisasContext {
+    DisasContextBase base;
+    CPURH850State *env;
+    target_ulong pc;  // pointer to instruction being translated
+    uint32_t opcode;
+    uint32_t opcode1;  // used for 48 bit instructions
+
+    // Unicorn
+    struct uc_struct *uc;
+} DisasContext;
+
+void gen_get_gpr(TCGContext *tcg_ctx, TCGv t, int reg_num);
+void gen_set_gpr(TCGContext *tcg_ctx, int reg_num_dst, TCGv t);
+void gen_set_spr(TCGContext *tcg_ctx, int bank_id, int reg_id, TCGv t);
+void gen_get_spr(TCGContext *tcg_ctx, int bank_id, int reg_id, TCGv t);
+
+#endif /* _RH850_TRANSLATE_H */
\ No newline at end of file
diff --git a/qemu/target/rh850/unicorn.c b/qemu/target/rh850/unicorn.c
new file mode 100644
index 0000000000..362c92dc8c
--- /dev/null
+++ b/qemu/target/rh850/unicorn.c
@@ -0,0 +1,140 @@
+/* Unicorn Emulator Engine */
+/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2015-2021 */
+/* Modified for Unicorn Engine by Damien Cauquil<dcauquil@quarkslab.com>, 2020 */
+
+#include "sysemu/cpus.h"
+#include "cpu.h"
+#include "unicorn_common.h"
+#include "uc_priv.h"
+#include "unicorn.h"
+
+RH850CPU *cpu_rh850_init(struct uc_struct *uc, const char *cpu_model);
+
+static void rh850_set_pc(struct uc_struct *uc, uint64_t address)
+{
+    ((CPURH850State *)uc->cpu->env_ptr)->pc = address;
+}
+
+static uint64_t rh850_get_pc(struct uc_struct *uc)
+{
+    return ((CPURH850State *)uc->cpu->env_ptr)->pc;
+}
+
+static void rh850_release(void *ctx)
+{
+
+    int i;
+    TCGContext *tcg_ctx = (TCGContext *)ctx;
+    RH850CPU *cpu = (RH850CPU *)tcg_ctx->uc->cpu;
+    CPUTLBDesc *d = cpu->neg.tlb.d;
+    CPUTLBDescFast *f = cpu->neg.tlb.f;
+    CPUTLBDesc *desc;
+    CPUTLBDescFast *fast;
+
+    release_common(ctx);
+    for (i = 0; i < NB_MMU_MODES; i++) {
+        desc = &(d[i]);
+        fast = &(f[i]);
+        g_free(desc->iotlb);
+        g_free(fast->table);
+    }
+}
+
+static void reg_reset(struct uc_struct *uc)
+{
+    CPUArchState *env = uc->cpu->env_ptr;
+
+    memset(env->gpRegs, 0, sizeof(env->gpRegs));
+    env->pc = 0;
+}
+
+DEFAULT_VISIBILITY
+uc_err reg_read(void *_env, int mode, unsigned int regid, void *value, size_t *size)
+{
+    int sel_id;
+    CPURH850State *env = _env;
+    uc_err ret = UC_ERR_ARG;
+
+    /* PC */
+    if (regid == UC_RH850_REG_PC)
+    {
+        CHECK_REG_TYPE(uint32_t);
+        *(uint32_t *)value = env->pc;
+    }
+
+    /* General purpose register. */
+    if ((regid >= UC_RH850_REG_R0) && (regid <= UC_RH850_REG_R31))
+    {
+        CHECK_REG_TYPE(uint32_t);
+        *(uint32_t *)value = env->gpRegs[regid];
+    }
+
+    /* System registers. */
+    if ((regid >= UC_RH850_SYSREG_SELID0) && (regid < (UC_RH850_SYSREG_SELID7 + 32)))
+    {
+        CHECK_REG_TYPE(uint32_t);
+        sel_id = (regid - 32)/32;
+        *(uint32_t *)value = env->systemRegs[sel_id][regid % 32];
+    }
+
+    return ret;
+}
+
+
+DEFAULT_VISIBILITY
+uc_err reg_write(void *_env, int mode, unsigned int regid, const void *value, size_t *size, int *setpc)
+{
+    int sel_id;
+    CPURH850State *env = _env;
+    uc_err ret = UC_ERR_ARG;
+
+    /* PC */
+    if (regid == UC_RH850_REG_PC)
+    {
+        CHECK_REG_TYPE(uint32_t);
+        env->pc = *(uint32_t *)value;
+        *setpc = 1;
+    }
+
+    /* General purpose register. */
+    if ((regid >= UC_RH850_REG_R0) && (regid <= UC_RH850_REG_R31))
+    {
+        CHECK_REG_TYPE(uint32_t);
+        env->gpRegs[regid] = *(uint32_t *)value;
+    }
+
+    /* System registers. */
+    if ((regid >= UC_RH850_SYSREG_SELID0) && (regid <= (UC_RH850_SYSREG_SELID7 + 32)))
+    {
+        CHECK_REG_TYPE(uint32_t);
+        sel_id = (regid - 32)/32;
+        env->systemRegs[sel_id][regid % 32] = *(uint32_t *)value;
+    }
+
+    return ret;
+}
+
+static int rh850_cpus_init(struct uc_struct *uc, const char *cpu_model)
+{
+    RH850CPU *cpu;
+
+    cpu = cpu_rh850_init(uc, cpu_model);
+    if (cpu == NULL) {
+        return -1;
+    }
+    return 0;
+}
+
+DEFAULT_VISIBILITY
+void rh850_uc_init(struct uc_struct *uc)
+{
+    uc->reg_read = reg_read;
+    uc->reg_write = reg_write;
+    uc->reg_reset = reg_reset;
+    uc->release = rh850_release;
+    uc->set_pc = rh850_set_pc;
+    uc->get_pc = rh850_get_pc;
+    uc->cpus_init = rh850_cpus_init;
+    uc->cpu_context_size = offsetof(CPURH850State, uc);
+    uc_common_init(uc);
+}
diff --git a/qemu/target/rh850/unicorn.h b/qemu/target/rh850/unicorn.h
new file mode 100644
index 0000000000..7ce57301a4
--- /dev/null
+++ b/qemu/target/rh850/unicorn.h
@@ -0,0 +1,16 @@
+/* Unicorn Emulator Engine */
+/* By Damien Cauquil <dcauquil@quarkslab.com>, 2023 */
+
+#ifndef UC_QEMU_TARGET_RH850_H
+#define UC_QEMU_TARGET_RH850_H
+
+// functions to read & write registers
+uc_err reg_read_rh850(void *_env, int mode, unsigned int regid, void *value,
+                size_t *size);
+uc_err reg_write_rh850(void *_env, int mode, unsigned int regid, const void *value,
+                 size_t *size, int *setpc);
+
+void reg_reset_rh850(struct uc_struct *uc);
+
+void rh850_uc_init(struct uc_struct *uc);
+#endif
diff --git a/samples/Makefile b/samples/Makefile
index cbb3d91fb7..b896aecb43 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -93,6 +93,9 @@ endif
 ifneq (,$(findstring tricore,$(UNICORN_ARCHS)))
 SOURCES += sample_tricore.c
 endif
+ifneq (,$(findstring avr,$(UNICORN_ARCHS)))
+SOURCES += sample_avr.c
+endif
 
 BINS = $(SOURCES:.c=$(BIN_EXT))
 OBJS = $(SOURCES:.c=.o)
diff --git a/samples/sample_avr.c b/samples/sample_avr.c
new file mode 100644
index 0000000000..7482bbc204
--- /dev/null
+++ b/samples/sample_avr.c
@@ -0,0 +1,131 @@
+/*
+   Created for Unicorn Engine by Glenn Baker <glenn.baker@gmx.com>, 2024
+*/
+
+/* Sample code to demonstrate how to emulate AVR code */
+
+#include <stdio.h>
+#include <string.h>
+#include <unicorn/unicorn.h>
+
+// Code to be emulated
+static const uint32_t CODE_BASE = 0x0000;
+static const uint8_t CODE[] =
+    "\x86\x0f"          // add  r24, r22
+    "\x97\x1f"          // adc  r25, r23
+    "\x88\x0f"          // add  r24, r24
+    "\x99\x1f"          // adc  r25, r25
+    "\x01\x96"          // adiw r24, 0x01
+    "\x08\x95"          // ret
+    ;
+enum {
+    CODE_SIZE = sizeof(CODE) - 1,
+    CODE_SIZE_ALIGNED = (CODE_SIZE + 0xff) & -0x100,
+};
+
+static void hook_block(uc_engine *uc, uint64_t address, uint32_t size,
+                       void *user_data)
+{
+    printf(">>> Tracing basic block at 0x%" PRIx64 ", block size = 0x%x\n",
+           address, size);
+}
+
+static void hook_code(uc_engine *uc, uint64_t address, uint32_t size,
+                      void *user_data)
+{
+    printf(">>> Tracing instruction at 0x%" PRIx64
+           ", instruction size = 0x%x\n",
+           address, size);
+}
+
+static bool is_error(uc_err err, const char *what)
+{
+    if (err != UC_ERR_OK) {
+        fprintf(stderr, "error: failed on %s() with error %u: %s\n",
+                what, err, uc_strerror(err));
+        return true;
+    }
+    return false;
+}
+
+static bool test_avr(void)
+{
+    uc_engine *uc = NULL;
+    uc_hook trace1, trace2;
+    bool success = false;
+
+    uint8_t regs[32];
+    int reg_ids[32];
+    void *reg_vals[32];
+    int i;
+
+    printf("Emulate AVR code\n");
+    do {
+        // Initialize emulator in AVR mode
+        uc_err err = uc_open(UC_ARCH_AVR, UC_MODE_LITTLE_ENDIAN, &uc);
+        if (is_error(err, "uc_open"))
+            break;
+
+        // Map program code
+        err = uc_mem_map(uc, CODE_BASE, CODE_SIZE_ALIGNED, UC_PROT_READ|UC_PROT_EXEC);
+        if (is_error(err, "uc_mem_map"))
+            break;
+
+        // Write machine code to be emulated to memory
+        err = uc_mem_write(uc, CODE_BASE, CODE, CODE_SIZE);
+        if (is_error(err, "uc_mem_write"))
+            break;
+
+        // Tracing all basic blocks with customized callback
+        err = uc_hook_add(uc, &trace1, UC_HOOK_BLOCK, hook_block, NULL, 1, 0);
+        if (is_error(err, "uc_hook_add[UC_HOOK_BLOCK]"))
+            break;
+
+        // Tracing one instruction at CODE_BASE with customized callback
+        err = uc_hook_add(uc, &trace2, UC_HOOK_CODE, hook_code, NULL, CODE_BASE,
+            CODE_BASE + 1);
+        if (is_error(err, "uc_hook_add[UC_HOOK_CODE]"))
+            break;
+
+        // Initialize registers
+        memset(regs, 0, sizeof(regs));
+        regs[25] = 0; regs[24] = 1;
+        regs[23] = 0; regs[22] = 2;
+
+        for (i = 0; i < 4; i++) {
+            reg_ids[i] = UC_AVR_REG_R0 + 22 + i;
+            reg_vals[i] = &regs[22 + i];
+        }
+        err = uc_reg_write_batch(uc, reg_ids, reg_vals, 4);
+        if (is_error(err, "uc_reg_write_batch"))
+            break;
+
+        // Emulate machine code in infinite time (last param = 0), or
+        // when finishing all the code.
+        err = uc_emu_start(uc, CODE_BASE, CODE_BASE + 4, 0, 0);
+        if (is_error(err, "uc_emu_start"))
+            break;
+
+        // now print out some registers
+        printf(">>> Emulation done. Below is the CPU context\n");
+
+        uc_reg_read(uc, UC_AVR_REG_R25, &regs[25]);
+        uc_reg_read(uc, UC_AVR_REG_R24, &regs[24]);
+        uc_reg_read(uc, UC_AVR_REG_R23, &regs[23]);
+        uc_reg_read(uc, UC_AVR_REG_R22, &regs[22]);
+        printf(">>> r25,r24 = 0x%02x%02x\n", regs[25], regs[24]);
+        if (regs[25] == 0 && regs[24] == 3 && regs[23] == 0 && regs[22] == 2)
+            success = true;
+    } while (0);
+
+    if (uc)
+        uc_close(uc);
+    return success;
+}
+
+int main(int argc, char **argv, char **envp)
+{
+    if (!test_avr())
+        abort();
+    return 0;
+}
diff --git a/samples/sample_rh850.c b/samples/sample_rh850.c
new file mode 100644
index 0000000000..8f74bf5e77
--- /dev/null
+++ b/samples/sample_rh850.c
@@ -0,0 +1,118 @@
+/* Unicorn Emulator Engine */
+/* By Nguyen Anh Quynh, 2021 */
+
+/* Sample code to demonstrate how to emulate S390X code */
+
+#include <unicorn/unicorn.h>
+#include <string.h>
+
+// code to be emulated
+#define RH850_CODE "\x01\x0e\x06\x00\xc1\x11\x01\x1f\x00\x00\x41\x1f\x00\x00"
+
+// memory address where emulation starts
+#define ADDRESS 0x10000
+
+static void hook_block(uc_engine *uc, uint64_t address, uint32_t size,
+                       void *user_data)
+{
+    printf(">>> Tracing basic block at 0x%" PRIx64 ", block size = 0x%x\n",
+           address, size);
+}
+
+static void hook_code(uc_engine *uc, uint64_t address, uint32_t size,
+                      void *user_data)
+{
+    printf(">>> Tracing instruction at 0x%" PRIx64
+           ", instruction size = 0x%x\n",
+           address, size);
+}
+
+static void hook_mem64(uc_engine *uc, uc_mem_type type, uint64_t address,
+                       int size, int64_t value, void *user_data)
+{
+    uint64_t pc;
+    switch (type) {
+    default:
+        break;
+    case UC_MEM_READ:
+        uc_reg_read(uc, UC_RH850_REG_PC, &pc);
+        printf(">>> Memory read operation at 0x%" PRIx64 "\n", pc);
+        printf(">>> Memory is being READ at 0x%" PRIx64 ", data size = %u\n",
+               address, size);
+        break;
+    case UC_MEM_WRITE:
+        uc_reg_read(uc, UC_RH850_REG_PC, &pc);
+        printf(">>> Memory write operation at 0x%" PRIx64 "\n", pc);
+        printf(">>> Memory is being WRITE at 0x%" PRIx64
+               ", data size = %u, data value = 0x%" PRIx64 "\n",
+               address, size, value);
+        break;
+    }
+}
+
+
+static void test_rh850(void)
+{
+    uc_engine *uc;
+    uc_hook trace1, trace2, trace3;
+    uc_err err;
+
+    uint64_t r1 = 0x10000, r2 = 3, r3;
+
+    printf("Emulate RH850 code\n");
+
+    // Initialize emulator in S390X mode
+    err = uc_open(UC_ARCH_RH850, UC_MODE_LITTLE_ENDIAN, &uc);
+    if (err) {
+        printf("Failed on uc_open() with error returned: %u (%s)\n", err,
+               uc_strerror(err));
+        return;
+    }
+
+    // map 1MB memory for this emulation
+    uc_mem_map(uc, ADDRESS, 1024 * 1024, UC_PROT_ALL);
+
+    // write machine code to be emulated to memory
+    uc_mem_write(uc, ADDRESS, RH850_CODE, sizeof(RH850_CODE) - 1);
+
+    // initialize machine registers
+    uc_reg_write(uc, UC_RH850_REG_R1, &r1);
+    uc_reg_write(uc, UC_RH850_REG_R2, &r2);
+
+    // tracing all basic blocks with customized callback
+    uc_hook_add(uc, &trace1, UC_HOOK_BLOCK, hook_block, NULL, 1, 0);
+
+    // tracing all instruction
+    uc_hook_add(uc, &trace2, UC_HOOK_CODE, hook_code, NULL, 1, 0);
+
+    // tracing mem read
+    uc_hook_add(uc, &trace3, UC_HOOK_MEM_READ, hook_mem64, NULL, 1, 0);
+    uc_hook_add(uc, &trace3, UC_HOOK_MEM_WRITE, hook_mem64, NULL, 1, 0);
+
+    // emulate machine code in infinite time (last param = 0), or when
+    // finishing all the code.
+    err = uc_emu_start(uc, ADDRESS, ADDRESS + sizeof(RH850_CODE) - 1, 0, 0);
+    if (err) {
+        printf("Failed on uc_emu_start() with error returned: %u (%s)\n", err,
+               uc_strerror(err));
+    }
+
+    // now print out some registers
+    printf(">>> Emulation done. Below is the CPU context\n");
+
+    uc_reg_read(uc, UC_RH850_REG_R1, &r1);
+    uc_reg_read(uc, UC_RH850_REG_R2, &r2);
+    uc_reg_read(uc, UC_RH850_REG_R3, &r3);
+
+    printf(">>> R1 = 0x%" PRIx64 "\t\t>>> R2 = 0x%" PRIx64 "\n", r1, r2);
+    printf(">>> R3 = 0x%" PRIx64 "\n", r3);
+
+    uc_close(uc);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+    test_rh850();
+
+    return 0;
+}
diff --git a/symbols.sh b/symbols.sh
index 4424fb4319..1fb5cdfc7e 100755
--- a/symbols.sh
+++ b/symbols.sh
@@ -7746,6 +7746,12 @@ tcg_s390_program_interrupt \
 tcg_s390_data_exception \
 "
 
+rh850_SYMBOLS="restore_state_to_opc \
+helper_tlb_flush \
+helper_uc_rh850_exit \
+gen_intermediate_code \
+"
+
 tricore_SYMBOLS="
 helper_fadd \
 helper_fsub \
@@ -7759,7 +7765,21 @@ restore_state_to_opc \
 helper_uc_tricore_exit \
 "
 
-ARCHS="x86_64 arm aarch64 riscv32 riscv64 mips mipsel mips64 mips64el sparc sparc64 m68k ppc ppc64 s390x tricore"
+avr_SYMBOLS="
+helper_sleep \
+helper_unsupported \
+helper_debug \
+helper_break \
+helper_inb \
+helper_outb \
+helper_fullrd \
+helper_fullwr \
+helper_wdr \
+gen_intermediate_code \
+restore_state_to_opc \
+"
+
+ARCHS="x86_64 arm aarch64 riscv32 riscv64 mips mipsel mips64 mips64el rh850 sparc sparc64 m68k ppc ppc64 s390x tricore avr"
 
 for arch in $ARCHS; do
 
diff --git a/tests/unit/test_avr.c b/tests/unit/test_avr.c
new file mode 100644
index 0000000000..e9e6aecbcc
--- /dev/null
+++ b/tests/unit/test_avr.c
@@ -0,0 +1,268 @@
+#include <assert.h>
+#include "unicorn_test.h"
+
+#define ARRAY_ELEMS(a)  (sizeof(a) / sizeof((a)[0]))
+
+#define PAGE_SIZE       256
+#define PAGE_ALIGN(x)   (((x) + PAGE_SIZE - 1) & -PAGE_SIZE)
+
+enum {
+    ADDR__init__        = 0x0000, // __init__
+    ADDR_test_func      = 0x001a, // test_func()
+    ADDR_test_1         = 0x0030, // test_1()
+    ADDR_main           = 0x0058, // main()
+    ADDR_abort          = 0x0062, // abort()
+    ADDR_exit           = 0x006c, // _exit()
+    ADDR__stop_program  = 0x006e, // __stop_program()
+    ADDR__data__        = 0x0070, // __data__
+    ADDR__data__end     = 0x0072,
+};
+
+enum {
+    SIZE__init__        = ADDR_test_func - ADDR__init__,
+    SIZE_test_func      = ADDR_test_1 - ADDR_test_func,
+    SIZE_test_1         = ADDR_main - ADDR_test_1,
+    SIZE_main           = ADDR_abort - ADDR_main,
+    SIZE_abort          = ADDR_exit - ADDR_abort,
+    SIZE_exit           = ADDR__stop_program - ADDR_exit,
+    SIZE__stop_program  = ADDR__data__ - ADDR__stop_program,
+    SIZE__data__        = ADDR__data__end - ADDR__data__,
+};
+
+static const uint8_t FLASH[] =
+    // 00000000 <__ctors_end>:
+    "\x12\xe0"          // ldi	r17, 0x02
+    "\xa0\xe0"          // ldi	r26, 0x00
+    "\xb2\xe0"          // ldi	r27, 0x02
+    "\xe0\xe7"          // ldi	r30, 0x70
+    "\xf0\xe0"          // ldi	r31, 0x00
+    "\x00\xe0"          // ldi	r16, 0x00
+    "\x0b\xbf"          // out	0x3b, r16
+    "\x02\xc0"          // rjmp	.+4
+    "\x07\x90"          // elpm	r0, Z+
+    "\x0d\x92"          // st	X+, r0
+    "\xa2\x30"          // cpi	r26, 0x02
+    "\xb1\x07"          // cpc	r27, r17
+    "\xd9\xf7"          // brne	.-10
+
+    // 0000001a <test_func>:
+    "\x20\x91\x00\x02"  // lds	r18, 0x0200
+    "\x30\x91\x01\x02"  // lds	r19, 0x0201
+    "\x86\x0f"          // add	r24, r22
+    "\x97\x1f"          // adc	r25, r23
+    "\x88\x0f"          // add	r24, r24
+    "\x99\x1f"          // adc	r25, r25
+    "\x82\x0f"          // add	r24, r18
+    "\x93\x1f"          // adc	r25, r19
+    "\x08\x95"          // ret
+
+    // 00000030 <test_1>:
+    "\x62\xe0"          // ldi	r22, 0x02
+    "\x70\xe0"          // ldi	r23, 0x00
+    "\x81\xe0"          // ldi	r24, 0x01
+    "\x90\xe0"          // ldi	r25, 0x00
+    "\x0e\x94\x0d\x00"  // call	0x1a
+    "\x07\x97"          // sbiw	r24, 0x07
+    "\x11\xf0"          // breq	.+4
+    "\x0e\x94\x31\x00"  // call	0x62
+    "\x60\xe8"          // ldi	r22, 0x80
+    "\x70\xe0"          // ldi	r23, 0x00
+    "\x80\xe4"          // ldi	r24, 0x40
+    "\x90\xe0"          // ldi	r25, 0x00
+    "\x0e\x94\x0d\x00"  // call	0x1a
+    "\x81\x38"          // cpi	r24, 0x81
+    "\x91\x40"          // sbci	r25, 0x01
+    "\xa9\xf7"          // brne	.-22
+    "\x08\x95"          // ret
+
+    // 00000058 <main>:
+    "\x0e\x94\x18\x00"  // call	0x30
+    "\x80\xe0"          // ldi	r24, 0x00
+    "\x90\xe0"          // ldi	r25, 0x00
+    "\x08\x95"          // ret
+
+    // 00000062 <abort>:
+    "\x81\xe0"          // ldi	r24, 0x01
+    "\x90\xe0"          // ldi	r25, 0x00
+    "\xf8\x94"          // cli
+    "\x0c\x94\x36\x00"  // jmp	0x6c
+
+    // 0000006c <_exit>:
+    "\xf8\x94"          // cli
+
+    // 0000006e <__stop_program>:
+    "\xff\xcf"          // rjmp	.-2
+
+    // 0x000070 .data
+    "\x01\x00"
+    ;
+const uint64_t FLASH_SIZE = sizeof(FLASH);
+
+const uint64_t MEM_BASE = 0x0200;
+const uint64_t MEM_SIZE = 0x0100;
+
+static void uc_common_setup(uc_engine **uc, uc_cpu_avr cpu_model,
+    const uint8_t *code, uint64_t code_size)
+{
+    OK(uc_open(UC_ARCH_AVR, UC_MODE_LITTLE_ENDIAN, uc));
+    if (cpu_model != 0)
+        OK(uc_ctl_set_cpu_model(*uc, cpu_model));
+
+    OK(uc_mem_map(*uc, UC_AVR_MEM_FLASH, PAGE_ALIGN(code_size),
+           UC_PROT_READ|UC_PROT_EXEC));
+    OK(uc_mem_write(*uc, UC_AVR_MEM_FLASH, code, code_size));
+    OK(uc_mem_map(*uc, MEM_BASE, MEM_SIZE, UC_PROT_READ|UC_PROT_WRITE));
+}
+
+static void test_avr_basic_alu(void)
+{
+    uc_engine *uc = NULL;
+
+    uint8_t r[32] = {0,};
+    uint32_t r_pc;
+    uint16_t r_func_arg0 = 1, r_func_arg1 = 2, r_func_ret;
+    r[24] = 1;
+    r[22] = 2;
+
+    uc_common_setup(&uc, 0, FLASH, FLASH_SIZE);
+    OK(uc_reg_write(uc, UC_AVR_REG_R24W, &r_func_arg0));
+    OK(uc_reg_write(uc, UC_AVR_REG_R22W, &r_func_arg1));
+
+    const uint64_t code_start = ADDR_test_func + 8;
+    OK(uc_emu_start(uc, code_start, code_start + 4, 0, 0));
+
+    OK(uc_reg_read(uc, UC_AVR_REG_PC, &r_pc));
+    OK(uc_reg_read(uc, UC_AVR_REG_R25, &r[25]));
+    OK(uc_reg_read(uc, UC_AVR_REG_R24, &r[24]));
+    OK(uc_reg_read(uc, UC_AVR_REG_R23, &r[23]));
+    OK(uc_reg_read(uc, UC_AVR_REG_R22, &r[22]));
+
+    TEST_CHECK(r_pc == code_start + 4);
+    TEST_CHECK(r[25] == 0 && r[24] == 3);
+    TEST_CHECK(r[23] == 0 && r[22] == 2);
+
+    OK(uc_reg_read(uc, UC_AVR_REG_R24W, &r_func_ret));
+    OK(uc_reg_read(uc, UC_AVR_REG_R22W, &r_func_arg1));
+
+    TEST_CHECK(r_func_ret == r[24]);
+    TEST_CHECK(r_func_arg1 == r[22]);
+
+    OK(uc_close(uc));
+}
+
+typedef struct MEM_HOOK_RESULT_s {
+    uc_mem_type type;
+    uint64_t address;
+    int size;
+    uint64_t value;
+} MEM_HOOK_RESULT;
+
+typedef struct MEM_HOOK_RESULTS_s {
+    uint64_t count;
+    MEM_HOOK_RESULT results[16];
+} MEM_HOOK_RESULTS;
+
+static bool test_avr_basic_mem_cb_eventmem(uc_engine *uc, uc_mem_type type,
+    uint64_t address, int size, int64_t value, void *user_data)
+{
+    MEM_HOOK_RESULTS *const r = user_data;
+
+    uint64_t count = r->count;
+    if (count >= ARRAY_ELEMS(r->results)) {
+        TEST_ASSERT(false);
+    }
+
+    r->results[count].type = type;
+    r->results[count].address = address;
+    r->results[count].size = size;
+    r->results[count].value = value;
+    r->count++;
+    return true;
+}
+
+static void test_avr_basic_mem(void)
+{
+    uc_engine *uc = NULL;
+    uc_hook eventmem_hook;
+    MEM_HOOK_RESULTS eventmem_trace = {0};
+
+    const uint8_t *const DATA = &FLASH[ADDR__data__];
+    uint8_t mem[SIZE__data__];
+
+    uint32_t r_pc;
+    int i;
+
+    uc_common_setup(&uc, 0, FLASH, FLASH_SIZE);
+    OK(uc_hook_add(uc, &eventmem_hook, UC_HOOK_MEM_VALID,
+           test_avr_basic_mem_cb_eventmem, &eventmem_trace, 1, 0));
+
+    const uint64_t code_start = ADDR__init__;
+    OK(uc_emu_start(uc, code_start, ADDR__init__ + SIZE__init__, 0, 0));
+
+    OK(uc_reg_read(uc, UC_AVR_REG_PC, &r_pc));
+    TEST_CHECK(r_pc == ADDR__init__ + SIZE__init__);
+
+    // Check SRAM was correctly initialized with data from Flash program memory
+    OK(uc_mem_read(uc, MEM_BASE, mem, sizeof(mem)));
+    TEST_CHECK(memcmp(mem, DATA, SIZE__data__) == 0);
+
+    TEST_CHECK(eventmem_trace.count == 2*SIZE__data__);
+    for (i = 0; i < SIZE__data__; i++) {
+        const MEM_HOOK_RESULT *const mr = &eventmem_trace.results[2*i];
+        TEST_CHECK(mr->type == UC_MEM_READ);
+        TEST_CHECK(mr->address == (UC_AVR_MEM_FLASH|(ADDR__data__+i)));
+        TEST_CHECK(mr->size == 1);
+        TEST_CHECK(mr->value == 0);
+
+        const MEM_HOOK_RESULT *const mw = &eventmem_trace.results[2*i+1];
+        TEST_CHECK(mw->type == UC_MEM_WRITE);
+        TEST_CHECK(mw->address == MEM_BASE+i);
+        TEST_CHECK(mw->size == 1);
+        TEST_CHECK(mw->value == DATA[i]);
+    }
+
+    OK(uc_close(uc));
+}
+
+static void test_avr_full_exec(void)
+{
+    uc_engine *uc = NULL;
+
+    uint8_t r[32] = {0,};
+    uint32_t r_pc;
+    uint32_t r_sp;
+
+    uc_common_setup(&uc, 0, FLASH, FLASH_SIZE);
+
+    const uint64_t code_start = ADDR__init__;
+    OK(uc_emu_start(uc, code_start, ADDR__init__ + SIZE__init__, 0, 0));
+
+    OK(uc_reg_read(uc, UC_AVR_REG_PC, &r_pc));
+    TEST_CHECK(r_pc == ADDR__init__ + SIZE__init__);
+
+    r_sp = MEM_BASE + MEM_SIZE - 1;
+    OK(uc_reg_write(uc, UC_AVR_REG_SP, &r_sp));
+
+    const uint64_t exits[] = {
+        ADDR_main,
+        ADDR__stop_program
+    };
+    OK(uc_ctl_exits_enable(uc));
+    OK(uc_ctl_set_exits(uc, exits, ARRAY_ELEMS(exits)));
+
+    const uint64_t code_main = ADDR_main;
+    OK(uc_emu_start(uc, code_main, 0, 0, 0));
+
+    OK(uc_reg_read(uc, UC_AVR_REG_R25, &r[25]));
+    OK(uc_reg_read(uc, UC_AVR_REG_R24, &r[24]));
+    TEST_CHECK(r[25] == 0 && r[24] == 0);
+
+    OK(uc_close(uc));
+}
+
+TEST_LIST = {
+    {"test_avr_basic_alu", test_avr_basic_alu},
+    {"test_avr_basic_mem", test_avr_basic_mem},
+    {"test_avr_full_exec", test_avr_full_exec},
+    {NULL, NULL}
+};
diff --git a/tests/unit/test_rh850.c b/tests/unit/test_rh850.c
new file mode 100644
index 0000000000..e02e704167
--- /dev/null
+++ b/tests/unit/test_rh850.c
@@ -0,0 +1,40 @@
+#include "unicorn_test.h"
+
+const uint64_t code_start = 0x1000;
+const uint64_t code_len = 0x4000;
+
+static void uc_common_setup(uc_engine **uc, uc_arch arch, uc_mode mode,
+                            const char *code, uint64_t size)
+{
+    OK(uc_open(arch, mode, uc));
+    OK(uc_mem_map(*uc, code_start, code_len, UC_PROT_ALL));
+    OK(uc_mem_write(*uc, code_start, code, size));
+}
+
+static void test_rh850_add(void)
+{
+    char code[] = "\x01\x0e\x06\x00\xc1\x11"; 
+    uint32_t r1 = 0x1234;
+    uint32_t r2 = 0x7777;
+    uint32_t pc;
+    uc_engine *uc;
+
+    uc_common_setup(&uc, UC_ARCH_RH850, UC_MODE_LITTLE_ENDIAN, code,
+                    sizeof(code) - 1);
+    OK(uc_reg_write(uc, UC_RH850_REG_R1, &r1));
+    OK(uc_reg_write(uc, UC_RH850_REG_R2, &r2));
+
+    OK(uc_emu_start(uc, code_start, code_start + sizeof(code) - 1, 0, 0));
+
+    OK(uc_reg_read(uc, UC_RH850_REG_R1, &r1));
+    OK(uc_reg_read(uc, UC_RH850_REG_R2, &r2));
+    OK(uc_reg_read(uc, UC_RH850_REG_PC, &pc));
+
+    TEST_CHECK(r1 == 0x1234 + 6);
+    TEST_CHECK(r2 == 0x89b1);
+    TEST_CHECK(pc == code_start + sizeof(code) - 1);
+
+    OK(uc_close(uc));
+}
+
+TEST_LIST = {{"test_rh850_add", test_rh850_add}, {NULL, NULL}};
\ No newline at end of file
diff --git a/uc.c b/uc.c
index fd29765080..ea415e2171 100644
--- a/uc.c
+++ b/uc.c
@@ -22,9 +22,11 @@
 #include "qemu/target/mips/unicorn.h"
 #include "qemu/target/sparc/unicorn.h"
 #include "qemu/target/ppc/unicorn.h"
+#include "qemu/target/rh850/unicorn.h"
 #include "qemu/target/riscv/unicorn.h"
 #include "qemu/target/s390x/unicorn.h"
 #include "qemu/target/tricore/unicorn.h"
+#include "qemu/target/avr/unicorn.h"
 
 #include "qemu/include/tcg/tcg-apple-jit.h"
 #include "qemu/include/qemu/queue.h"
@@ -225,6 +227,10 @@ bool uc_arch_supported(uc_arch arch)
     case UC_ARCH_X86:
         return true;
 #endif
+#ifdef UNICORN_HAS_RH850
+    case UC_ARCH_RH850:
+        return true;
+#endif
 #ifdef UNICORN_HAS_RISCV
     case UC_ARCH_RISCV:
         return true;
@@ -236,6 +242,10 @@ bool uc_arch_supported(uc_arch arch)
 #ifdef UNICORN_HAS_TRICORE
     case UC_ARCH_TRICORE:
         return true;
+#endif
+#ifdef UNICORN_HAS_AVR
+    case UC_ARCH_AVR:
+        return true;
 #endif
     /* Invalid or disabled arch */
     default:
@@ -439,6 +449,15 @@ uc_err uc_open(uc_arch arch, uc_mode mode, uc_engine **result)
             }
             break;
 #endif
+#ifdef UNICORN_HAS_RH850
+        case UC_ARCH_RH850:
+            if (mode != UC_MODE_LITTLE_ENDIAN) {
+                free(uc);
+                return UC_ERR_MODE;
+            }
+            uc->init_arch = rh850_uc_init;
+            break;
+#endif
 #ifdef UNICORN_HAS_RISCV
         case UC_ARCH_RISCV:
             if ((mode & ~UC_MODE_RISCV_MASK) ||
@@ -473,6 +492,15 @@ uc_err uc_open(uc_arch arch, uc_mode mode, uc_engine **result)
             }
             uc->init_arch = uc_init_tricore;
             break;
+#endif
+#ifdef UNICORN_HAS_AVR
+        case UC_ARCH_AVR:
+            if ((mode & ~UC_MODE_AVR_MASK)) {
+                free(uc);
+                return UC_ERR_MODE;
+            }
+            uc->init_arch = uc_init_avr;
+            break;
 #endif
         }
 
@@ -1041,6 +1069,11 @@ uc_err uc_emu_start(uc_engine *uc, uint64_t begin, uint64_t until,
         }
         break;
 #endif
+#ifdef UNICORN_HAS_RH850
+    case UC_ARCH_RH850:
+        uc_reg_write(uc, UC_RH850_REG_PC, &begin);
+        break;
+#endif
 #ifdef UNICORN_HAS_RISCV
     case UC_ARCH_RISCV:
         if (uc->mode & UC_MODE_RISCV64) {
@@ -1059,6 +1092,11 @@ uc_err uc_emu_start(uc_engine *uc, uint64_t begin, uint64_t until,
     case UC_ARCH_TRICORE:
         uc_reg_write(uc, UC_TRICORE_REG_PC, &begin_pc32);
         break;
+#endif
+#ifdef UNICORN_HAS_AVR
+    case UC_ARCH_AVR:
+        uc_reg_write(uc, UC_AVR_REG_PC, &begin_pc32);
+        break;
 #endif
     }
 
@@ -1118,6 +1156,7 @@ uc_err uc_emu_start(uc_engine *uc, uint64_t begin, uint64_t until,
 
     if (timeout) {
         // wait for the timer to finish
+        printf("Wait VM to finish ...\n");
         qemu_thread_join(&uc->timer);
     }
 
@@ -2280,6 +2319,12 @@ static context_reg_rw_t find_context_reg_rw(uc_arch arch, uc_mode mode)
         }
         break;
 #endif
+#ifdef UNICORN_HAS_RH850
+    case UC_ARCH_RH850:
+        rw.read = reg_read_rh850;
+        rw.write = reg_write_rh850;
+        break;
+#endif
 #ifdef UNICORN_HAS_RISCV
     case UC_ARCH_RISCV:
         if (mode & UC_MODE_RISCV32) {
@@ -2302,6 +2347,12 @@ static context_reg_rw_t find_context_reg_rw(uc_arch arch, uc_mode mode)
         rw.read = reg_read_tricore;
         rw.write = reg_write_tricore;
         break;
+#endif
+#ifdef UNICORN_HAS_AVR
+    case UC_ARCH_AVR:
+        rw.read = reg_read_avr;
+        rw.write = reg_write_avr;
+        break;
 #endif
     }
 
@@ -2731,6 +2782,11 @@ uc_err uc_ctl(uc_engine *uc, uc_control_type control, ...)
                     err = UC_ERR_ARG;
                     break;
                 }
+            } else if (uc->arch == UC_ARCH_AVR) {
+                if (!avr_cpu_model_valid(model)) {
+                    err = UC_ERR_ARG;
+                    break;
+                }
             } else {
                 err = UC_ERR_ARG;
                 break;

From 7f77dcec386abce57a9ca3b9e397813cef13ad84 Mon Sep 17 00:00:00 2001
From: Amaan Qureshi <amaanq12@gmail.com>
Date: Sun, 6 Apr 2025 02:59:31 -0400
Subject: [PATCH 4/4] regenerate symbols & add loongarch backend (#1903)
 (#2148)

* regenerate symbols

* Squash loongarch

---------

Co-authored-by: WangLiangpu <wangjingpu17@mails.ucas.ac.cn>
---
 CMakeLists.txt                           |    5 +
 qemu/avr.h                               |  113 +-
 qemu/configure                           |   16 +
 qemu/include/elf.h                       |    1 +
 qemu/rh850.h                             |  101 +-
 qemu/tcg/loongarch64/tcg-insn-defs.c.inc | 7004 ++++++++++++++++++++++
 qemu/tcg/loongarch64/tcg-target.h        |  228 +
 qemu/tcg/loongarch64/tcg-target.inc.c    | 2681 +++++++++
 qemu/tcg/loongarch64/tcg-target.opc.h    |    3 +
 9 files changed, 10106 insertions(+), 46 deletions(-)
 create mode 100644 qemu/tcg/loongarch64/tcg-insn-defs.c.inc
 create mode 100644 qemu/tcg/loongarch64/tcg-target.h
 create mode 100644 qemu/tcg/loongarch64/tcg-target.inc.c
 create mode 100644 qemu/tcg/loongarch64/tcg-target.opc.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6a266c4d76..e6b60aa890 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -279,6 +279,11 @@ else()
                 set(UNICORN_TARGET_ARCH "avr")
                 break()
             endif()
+            string(FIND ${UC_COMPILER_MACRO} "loongarch64" UC_RET)
+            if (${UC_RET} GREATER_EQUAL "0")
+                set(UNICORN_TARGET_ARCH "loongarch64")
+                break()
+            endif()
             message(FATAL_ERROR "Unknown host compiler: ${CMAKE_C_COMPILER}.")
         endwhile(TRUE)
     endif()
diff --git a/qemu/avr.h b/qemu/avr.h
index bb37176913..a20c033dc7 100644
--- a/qemu/avr.h
+++ b/qemu/avr.h
@@ -4,6 +4,10 @@
 #ifndef UNICORN_ARCH_POSTFIX
 #define UNICORN_ARCH_POSTFIX _avr
 #endif
+#define unicorn_fill_tlb unicorn_fill_tlb_avr
+#define reg_read reg_read_avr
+#define reg_write reg_write_avr
+#define uc_init uc_init_avr
 #define uc_add_inline_hook uc_add_inline_hook_avr
 #define uc_del_inline_hook uc_del_inline_hook_avr
 #define tb_invalidate_phys_range tb_invalidate_phys_range_avr
@@ -38,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_avr
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_avr
 #define tcg_gen_st_i64 tcg_gen_st_i64_avr
+#define tcg_gen_add_i64 tcg_gen_add_i64_avr
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_avr
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_avr
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_avr
 #define cpu_icount_to_ns cpu_icount_to_ns_avr
 #define cpu_is_stopped cpu_is_stopped_avr
 #define cpu_get_ticks cpu_get_ticks_avr
@@ -121,7 +128,10 @@
 #define memory_map memory_map_avr
 #define memory_map_io memory_map_io_avr
 #define memory_map_ptr memory_map_ptr_avr
+#define memory_cow memory_cow_avr
 #define memory_unmap memory_unmap_avr
+#define memory_moveout memory_moveout_avr
+#define memory_movein memory_movein_avr
 #define memory_free memory_free_avr
 #define flatview_unref flatview_unref_avr
 #define address_space_get_flatview address_space_get_flatview_avr
@@ -140,7 +150,9 @@
 #define memory_region_get_ram_addr memory_region_get_ram_addr_avr
 #define memory_region_add_subregion memory_region_add_subregion_avr
 #define memory_region_del_subregion memory_region_del_subregion_avr
+#define memory_region_add_subregion_overlap memory_region_add_subregion_overlap_avr
 #define memory_region_find memory_region_find_avr
+#define memory_region_filter_subregions memory_region_filter_subregions_avr
 #define memory_listener_register memory_listener_register_avr
 #define memory_listener_unregister memory_listener_unregister_avr
 #define address_space_remove_listeners address_space_remove_listeners_avr
@@ -148,6 +160,7 @@
 #define address_space_destroy address_space_destroy_avr
 #define memory_region_init_ram memory_region_init_ram_avr
 #define memory_mapping_list_add_merge_sorted memory_mapping_list_add_merge_sorted_avr
+#define find_memory_mapping find_memory_mapping_avr
 #define exec_inline_op exec_inline_op_avr
 #define floatx80_default_nan floatx80_default_nan_avr
 #define float_raise float_raise_avr
@@ -364,6 +377,8 @@
 #define floatx80_sub floatx80_sub_avr
 #define floatx80_mul floatx80_mul_avr
 #define floatx80_div floatx80_div_avr
+#define floatx80_modrem floatx80_modrem_avr
+#define floatx80_mod floatx80_mod_avr
 #define floatx80_rem floatx80_rem_avr
 #define floatx80_sqrt floatx80_sqrt_avr
 #define floatx80_eq floatx80_eq_avr
@@ -638,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_avr
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_avr
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_avr
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_avr
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_avr
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_avr
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_avr
@@ -692,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_avr
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_avr
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_avr
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_avr
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_avr
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_avr
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_avr
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_avr
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_avr
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_avr
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_avr
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_avr
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_avr
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_avr
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_avr
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_avr
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_avr
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_avr
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_avr
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_avr
@@ -735,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_avr
 #define tcg_gen_shri_vec tcg_gen_shri_vec_avr
 #define tcg_gen_sari_vec tcg_gen_sari_vec_avr
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_avr
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_avr
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_avr
 #define tcg_gen_add_vec tcg_gen_add_vec_avr
 #define tcg_gen_sub_vec tcg_gen_sub_vec_avr
@@ -750,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_avr
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_avr
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_avr
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_avr
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_avr
 #define tcg_gen_shls_vec tcg_gen_shls_vec_avr
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_avr
 #define tcg_gen_sars_vec tcg_gen_sars_vec_avr
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_avr
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_avr
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_avr
 #define tb_htable_lookup tb_htable_lookup_avr
@@ -764,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_avr
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_avr
 #define tlb_init tlb_init_avr
+#define tlb_destroy tlb_destroy_avr
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_avr
 #define tlb_flush tlb_flush_avr
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_avr
@@ -784,6 +813,7 @@
 #define tlb_set_page tlb_set_page_avr
 #define get_page_addr_code_hostp get_page_addr_code_hostp_avr
 #define get_page_addr_code get_page_addr_code_avr
+#define probe_access_flags probe_access_flags_avr
 #define probe_access probe_access_avr
 #define tlb_vaddr_to_host tlb_vaddr_to_host_avr
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_avr
@@ -800,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_avr
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_avr
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_avr
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_avr
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_avr
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_avr
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_avr
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_avr
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_avr
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_avr
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_avr
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_avr
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_avr
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_avr
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_avr
 #define cpu_ldub_data_ra cpu_ldub_data_ra_avr
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_avr
-#define cpu_lduw_data_ra cpu_lduw_data_ra_avr
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_avr
-#define cpu_ldl_data_ra cpu_ldl_data_ra_avr
-#define cpu_ldq_data_ra cpu_ldq_data_ra_avr
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_avr
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_avr
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_avr
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_avr
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_avr
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_avr
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_avr
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_avr
 #define cpu_ldub_data cpu_ldub_data_avr
 #define cpu_ldsb_data cpu_ldsb_data_avr
-#define cpu_lduw_data cpu_lduw_data_avr
-#define cpu_ldsw_data cpu_ldsw_data_avr
-#define cpu_ldl_data cpu_ldl_data_avr
-#define cpu_ldq_data cpu_ldq_data_avr
+#define cpu_lduw_be_data cpu_lduw_be_data_avr
+#define cpu_lduw_le_data cpu_lduw_le_data_avr
+#define cpu_ldsw_be_data cpu_ldsw_be_data_avr
+#define cpu_ldsw_le_data cpu_ldsw_le_data_avr
+#define cpu_ldl_be_data cpu_ldl_be_data_avr
+#define cpu_ldl_le_data cpu_ldl_le_data_avr
+#define cpu_ldq_le_data cpu_ldq_le_data_avr
+#define cpu_ldq_be_data cpu_ldq_be_data_avr
 #define helper_ret_stb_mmu helper_ret_stb_mmu_avr
 #define helper_le_stw_mmu helper_le_stw_mmu_avr
 #define helper_be_stw_mmu helper_be_stw_mmu_avr
@@ -824,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_avr
 #define helper_be_stq_mmu helper_be_stq_mmu_avr
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_avr
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_avr
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_avr
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_avr
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_avr
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_avr
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_avr
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_avr
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_avr
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_avr
 #define cpu_stb_data_ra cpu_stb_data_ra_avr
-#define cpu_stw_data_ra cpu_stw_data_ra_avr
-#define cpu_stl_data_ra cpu_stl_data_ra_avr
-#define cpu_stq_data_ra cpu_stq_data_ra_avr
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_avr
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_avr
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_avr
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_avr
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_avr
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_avr
 #define cpu_stb_data cpu_stb_data_avr
-#define cpu_stw_data cpu_stw_data_avr
-#define cpu_stl_data cpu_stl_data_avr
-#define cpu_stq_data cpu_stq_data_avr
+#define cpu_stw_be_data cpu_stw_be_data_avr
+#define cpu_stw_le_data cpu_stw_le_data_avr
+#define cpu_stl_be_data cpu_stl_be_data_avr
+#define cpu_stl_le_data cpu_stl_le_data_avr
+#define cpu_stq_be_data cpu_stq_be_data_avr
+#define cpu_stq_le_data cpu_stq_le_data_avr
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_avr
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_avr
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_avr
@@ -1091,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_avr
 #define cpu_ldl_code cpu_ldl_code_avr
 #define cpu_ldq_code cpu_ldq_code_avr
+#define cpu_interrupt_handler cpu_interrupt_handler_avr
 #define helper_div_i32 helper_div_i32_avr
 #define helper_rem_i32 helper_rem_i32_avr
 #define helper_divu_i32 helper_divu_i32_avr
@@ -1175,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_avr
 #define helper_gvec_sar32i helper_gvec_sar32i_avr
 #define helper_gvec_sar64i helper_gvec_sar64i_avr
+#define helper_gvec_rotl8i helper_gvec_rotl8i_avr
+#define helper_gvec_rotl16i helper_gvec_rotl16i_avr
+#define helper_gvec_rotl32i helper_gvec_rotl32i_avr
+#define helper_gvec_rotl64i helper_gvec_rotl64i_avr
 #define helper_gvec_shl8v helper_gvec_shl8v_avr
 #define helper_gvec_shl16v helper_gvec_shl16v_avr
 #define helper_gvec_shl32v helper_gvec_shl32v_avr
@@ -1187,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_avr
 #define helper_gvec_sar32v helper_gvec_sar32v_avr
 #define helper_gvec_sar64v helper_gvec_sar64v_avr
+#define helper_gvec_rotl8v helper_gvec_rotl8v_avr
+#define helper_gvec_rotl16v helper_gvec_rotl16v_avr
+#define helper_gvec_rotl32v helper_gvec_rotl32v_avr
+#define helper_gvec_rotl64v helper_gvec_rotl64v_avr
+#define helper_gvec_rotr8v helper_gvec_rotr8v_avr
+#define helper_gvec_rotr16v helper_gvec_rotr16v_avr
+#define helper_gvec_rotr32v helper_gvec_rotr32v_avr
+#define helper_gvec_rotr64v helper_gvec_rotr64v_avr
 #define helper_gvec_eq8 helper_gvec_eq8_avr
 #define helper_gvec_ne8 helper_gvec_ne8_avr
 #define helper_gvec_lt8 helper_gvec_lt8_avr
@@ -1279,6 +1343,9 @@
 #define gen_helper_vfp_set_fpscr gen_helper_vfp_set_fpscr_avr
 #define gen_helper_cpsr_read gen_helper_cpsr_read_avr
 #define gen_helper_cpsr_write gen_helper_cpsr_write_avr
+#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_avr
+#define helper_stqcx_le_parallel helper_stqcx_le_parallel_avr
+#define helper_stqcx_be_parallel helper_stqcx_be_parallel_avr
 #define helper_sleep helper_sleep_avr
 #define helper_unsupported helper_unsupported_avr
 #define helper_debug helper_debug_avr
@@ -1290,8 +1357,4 @@
 #define helper_wdr helper_wdr_avr
 #define gen_intermediate_code gen_intermediate_code_avr
 #define restore_state_to_opc restore_state_to_opc_avr
-
-#define reg_read reg_read_avr
-#define reg_write reg_write_avr
-#define uc_init uc_init_avr
 #endif
diff --git a/qemu/configure b/qemu/configure
index cc5752292f..3085467646 100755
--- a/qemu/configure
+++ b/qemu/configure
@@ -498,6 +498,8 @@ elif check_define __tricore__ ; then
   cpu="tricore"
 elif check_define __AVR__ ; then
   cpu="avr"
+elif check_define __loongarch64 ; then
+  cpu="loongarch64"
 else
   cpu=$(uname -m)
 fi
@@ -545,6 +547,10 @@ case "$cpu" in
     cpu="avr"
     supported_cpu="yes"
   ;;
+  loongarch64)
+    cpu="loongarch64"
+    supported_cpu="yes"
+  ;;
   *)
     # This will result in either an error or falling back to TCI later
     ARCH=unknown
@@ -859,6 +865,11 @@ case "$cpu" in
            CPU_CFLAGS="-m64 -mcx16"
            QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS"
            ;;
+    loongarch*)
+	   CPU_CFLAGS=""
+           QEMU_LDFLAGS=" $QEMU_LDFLAGS"
+           ;;
+
     x32)
            CPU_CFLAGS="-mx32"
            QEMU_LDFLAGS="-mx32 $QEMU_LDFLAGS"
@@ -2680,6 +2691,11 @@ case "$target_name" in
     mttcg="yes"
     TARGET_SYSTBL_ABI=i386
   ;;
+  loongarch64)
+    mttcg="yes"
+    TARGET_ARCH=loongarch64
+    TARGET_SYSTBL_ABI=common,64
+  ;;
   x86_64)
     TARGET_BASE_ARCH=i386
     TARGET_SYSTBL_ABI=common,64
diff --git a/qemu/include/elf.h b/qemu/include/elf.h
index 5b06b55f28..a6bec3d674 100644
--- a/qemu/include/elf.h
+++ b/qemu/include/elf.h
@@ -176,6 +176,7 @@ typedef struct mips_elf_abiflags_v0 {
 
 #define EM_NANOMIPS     249     /* Wave Computing nanoMIPS */
 
+#define EM_LOONGARCH        258 /* LoongArch */
 /*
  * This is an interim value that we will use until the committee comes
  * up with a final number.
diff --git a/qemu/rh850.h b/qemu/rh850.h
index 071393cb7c..f0ba0cabf3 100644
--- a/qemu/rh850.h
+++ b/qemu/rh850.h
@@ -42,7 +42,10 @@
 #define tcg_gen_shl_i64 tcg_gen_shl_i64_rh850
 #define tcg_gen_shr_i64 tcg_gen_shr_i64_rh850
 #define tcg_gen_st_i64 tcg_gen_st_i64_rh850
+#define tcg_gen_add_i64 tcg_gen_add_i64_rh850
+#define tcg_gen_sub_i64 tcg_gen_sub_i64_rh850
 #define tcg_gen_xor_i64 tcg_gen_xor_i64_rh850
+#define tcg_gen_neg_i64 tcg_gen_neg_i64_rh850
 #define cpu_icount_to_ns cpu_icount_to_ns_rh850
 #define cpu_is_stopped cpu_is_stopped_rh850
 #define cpu_get_ticks cpu_get_ticks_rh850
@@ -54,6 +57,7 @@
 #define vm_start vm_start_rh850
 #define address_space_dispatch_compact address_space_dispatch_compact_rh850
 #define flatview_translate flatview_translate_rh850
+#define flatview_copy flatview_copy_rh850
 #define address_space_translate_for_iotlb address_space_translate_for_iotlb_rh850
 #define qemu_get_cpu qemu_get_cpu_rh850
 #define cpu_address_space_init cpu_address_space_init_rh850
@@ -90,6 +94,7 @@
 #define iotlb_to_section iotlb_to_section_rh850
 #define address_space_dispatch_new address_space_dispatch_new_rh850
 #define address_space_dispatch_free address_space_dispatch_free_rh850
+#define address_space_dispatch_clear address_space_dispatch_clear_rh850
 #define flatview_read_continue flatview_read_continue_rh850
 #define address_space_read_full address_space_read_full_rh850
 #define address_space_write address_space_write_rh850
@@ -372,6 +377,8 @@
 #define floatx80_sub floatx80_sub_rh850
 #define floatx80_mul floatx80_mul_rh850
 #define floatx80_div floatx80_div_rh850
+#define floatx80_modrem floatx80_modrem_rh850
+#define floatx80_mod floatx80_mod_rh850
 #define floatx80_rem floatx80_rem_rh850
 #define floatx80_sqrt floatx80_sqrt_rh850
 #define floatx80_eq floatx80_eq_rh850
@@ -646,6 +653,7 @@
 #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_rh850
 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_rh850
 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_rh850
+#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_rh850
 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_rh850
 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_rh850
 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_rh850
@@ -700,13 +708,20 @@
 #define tcg_gen_gvec_shri tcg_gen_gvec_shri_rh850
 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_rh850
 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_rh850
+#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_rh850
+#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_rh850
 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_rh850
+#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_rh850
+#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_rh850
 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_rh850
 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_rh850
 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_rh850
+#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_rh850
 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_rh850
 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_rh850
 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_rh850
+#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_rh850
+#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_rh850
 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_rh850
 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_rh850
 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_rh850
@@ -743,6 +758,8 @@
 #define tcg_gen_shli_vec tcg_gen_shli_vec_rh850
 #define tcg_gen_shri_vec tcg_gen_shri_vec_rh850
 #define tcg_gen_sari_vec tcg_gen_sari_vec_rh850
+#define tcg_gen_rotli_vec tcg_gen_rotli_vec_rh850
+#define tcg_gen_rotri_vec tcg_gen_rotri_vec_rh850
 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_rh850
 #define tcg_gen_add_vec tcg_gen_add_vec_rh850
 #define tcg_gen_sub_vec tcg_gen_sub_vec_rh850
@@ -758,9 +775,12 @@
 #define tcg_gen_shlv_vec tcg_gen_shlv_vec_rh850
 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_rh850
 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_rh850
+#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_rh850
+#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_rh850
 #define tcg_gen_shls_vec tcg_gen_shls_vec_rh850
 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_rh850
 #define tcg_gen_sars_vec tcg_gen_sars_vec_rh850
+#define tcg_gen_rotls_vec tcg_gen_rotls_vec_rh850
 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_rh850
 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_rh850
 #define tb_htable_lookup tb_htable_lookup_rh850
@@ -772,6 +792,7 @@
 #define cpu_loop_exit_restore cpu_loop_exit_restore_rh850
 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_rh850
 #define tlb_init tlb_init_rh850
+#define tlb_destroy tlb_destroy_rh850
 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_rh850
 #define tlb_flush tlb_flush_rh850
 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_rh850
@@ -792,6 +813,7 @@
 #define tlb_set_page tlb_set_page_rh850
 #define get_page_addr_code_hostp get_page_addr_code_hostp_rh850
 #define get_page_addr_code get_page_addr_code_rh850
+#define probe_access_flags probe_access_flags_rh850
 #define probe_access probe_access_rh850
 #define tlb_vaddr_to_host tlb_vaddr_to_host_rh850
 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_rh850
@@ -808,22 +830,34 @@
 #define helper_be_ldsl_mmu helper_be_ldsl_mmu_rh850
 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_rh850
 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_rh850
-#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_rh850
-#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_rh850
-#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_rh850
-#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_rh850
+#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_rh850
+#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_rh850
+#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_rh850
+#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_rh850
+#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_rh850
+#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_rh850
+#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_rh850
+#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_rh850
 #define cpu_ldub_data_ra cpu_ldub_data_ra_rh850
 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_rh850
-#define cpu_lduw_data_ra cpu_lduw_data_ra_rh850
-#define cpu_ldsw_data_ra cpu_ldsw_data_ra_rh850
-#define cpu_ldl_data_ra cpu_ldl_data_ra_rh850
-#define cpu_ldq_data_ra cpu_ldq_data_ra_rh850
+#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_rh850
+#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_rh850
+#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_rh850
+#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_rh850
+#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_rh850
+#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_rh850
+#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_rh850
+#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_rh850
 #define cpu_ldub_data cpu_ldub_data_rh850
 #define cpu_ldsb_data cpu_ldsb_data_rh850
-#define cpu_lduw_data cpu_lduw_data_rh850
-#define cpu_ldsw_data cpu_ldsw_data_rh850
-#define cpu_ldl_data cpu_ldl_data_rh850
-#define cpu_ldq_data cpu_ldq_data_rh850
+#define cpu_lduw_be_data cpu_lduw_be_data_rh850
+#define cpu_lduw_le_data cpu_lduw_le_data_rh850
+#define cpu_ldsw_be_data cpu_ldsw_be_data_rh850
+#define cpu_ldsw_le_data cpu_ldsw_le_data_rh850
+#define cpu_ldl_be_data cpu_ldl_be_data_rh850
+#define cpu_ldl_le_data cpu_ldl_le_data_rh850
+#define cpu_ldq_le_data cpu_ldq_le_data_rh850
+#define cpu_ldq_be_data cpu_ldq_be_data_rh850
 #define helper_ret_stb_mmu helper_ret_stb_mmu_rh850
 #define helper_le_stw_mmu helper_le_stw_mmu_rh850
 #define helper_be_stw_mmu helper_be_stw_mmu_rh850
@@ -832,17 +866,26 @@
 #define helper_le_stq_mmu helper_le_stq_mmu_rh850
 #define helper_be_stq_mmu helper_be_stq_mmu_rh850
 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_rh850
-#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_rh850
-#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_rh850
-#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_rh850
+#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_rh850
+#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_rh850
+#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_rh850
+#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_rh850
+#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_rh850
+#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_rh850
 #define cpu_stb_data_ra cpu_stb_data_ra_rh850
-#define cpu_stw_data_ra cpu_stw_data_ra_rh850
-#define cpu_stl_data_ra cpu_stl_data_ra_rh850
-#define cpu_stq_data_ra cpu_stq_data_ra_rh850
+#define cpu_stw_be_data_ra cpu_stw_be_data_ra_rh850
+#define cpu_stw_le_data_ra cpu_stw_le_data_ra_rh850
+#define cpu_stl_be_data_ra cpu_stl_be_data_ra_rh850
+#define cpu_stl_le_data_ra cpu_stl_le_data_ra_rh850
+#define cpu_stq_be_data_ra cpu_stq_be_data_ra_rh850
+#define cpu_stq_le_data_ra cpu_stq_le_data_ra_rh850
 #define cpu_stb_data cpu_stb_data_rh850
-#define cpu_stw_data cpu_stw_data_rh850
-#define cpu_stl_data cpu_stl_data_rh850
-#define cpu_stq_data cpu_stq_data_rh850
+#define cpu_stw_be_data cpu_stw_be_data_rh850
+#define cpu_stw_le_data cpu_stw_le_data_rh850
+#define cpu_stl_be_data cpu_stl_be_data_rh850
+#define cpu_stl_le_data cpu_stl_le_data_rh850
+#define cpu_stq_be_data cpu_stq_be_data_rh850
+#define cpu_stq_le_data cpu_stq_le_data_rh850
 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_rh850
 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_rh850
 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_rh850
@@ -1099,6 +1142,7 @@
 #define cpu_lduw_code cpu_lduw_code_rh850
 #define cpu_ldl_code cpu_ldl_code_rh850
 #define cpu_ldq_code cpu_ldq_code_rh850
+#define cpu_interrupt_handler cpu_interrupt_handler_rh850
 #define helper_div_i32 helper_div_i32_rh850
 #define helper_rem_i32 helper_rem_i32_rh850
 #define helper_divu_i32 helper_divu_i32_rh850
@@ -1183,6 +1227,10 @@
 #define helper_gvec_sar16i helper_gvec_sar16i_rh850
 #define helper_gvec_sar32i helper_gvec_sar32i_rh850
 #define helper_gvec_sar64i helper_gvec_sar64i_rh850
+#define helper_gvec_rotl8i helper_gvec_rotl8i_rh850
+#define helper_gvec_rotl16i helper_gvec_rotl16i_rh850
+#define helper_gvec_rotl32i helper_gvec_rotl32i_rh850
+#define helper_gvec_rotl64i helper_gvec_rotl64i_rh850
 #define helper_gvec_shl8v helper_gvec_shl8v_rh850
 #define helper_gvec_shl16v helper_gvec_shl16v_rh850
 #define helper_gvec_shl32v helper_gvec_shl32v_rh850
@@ -1195,6 +1243,14 @@
 #define helper_gvec_sar16v helper_gvec_sar16v_rh850
 #define helper_gvec_sar32v helper_gvec_sar32v_rh850
 #define helper_gvec_sar64v helper_gvec_sar64v_rh850
+#define helper_gvec_rotl8v helper_gvec_rotl8v_rh850
+#define helper_gvec_rotl16v helper_gvec_rotl16v_rh850
+#define helper_gvec_rotl32v helper_gvec_rotl32v_rh850
+#define helper_gvec_rotl64v helper_gvec_rotl64v_rh850
+#define helper_gvec_rotr8v helper_gvec_rotr8v_rh850
+#define helper_gvec_rotr16v helper_gvec_rotr16v_rh850
+#define helper_gvec_rotr32v helper_gvec_rotr32v_rh850
+#define helper_gvec_rotr64v helper_gvec_rotr64v_rh850
 #define helper_gvec_eq8 helper_gvec_eq8_rh850
 #define helper_gvec_ne8 helper_gvec_ne8_rh850
 #define helper_gvec_lt8 helper_gvec_lt8_rh850
@@ -1287,6 +1343,9 @@
 #define gen_helper_vfp_set_fpscr gen_helper_vfp_set_fpscr_rh850
 #define gen_helper_cpsr_read gen_helper_cpsr_read_rh850
 #define gen_helper_cpsr_write gen_helper_cpsr_write_rh850
+#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_rh850
+#define helper_stqcx_le_parallel helper_stqcx_le_parallel_rh850
+#define helper_stqcx_be_parallel helper_stqcx_be_parallel_rh850
 #define restore_state_to_opc restore_state_to_opc_rh850
 #define helper_tlb_flush helper_tlb_flush_rh850
 #define helper_uc_rh850_exit helper_uc_rh850_exit_rh850
diff --git a/qemu/tcg/loongarch64/tcg-insn-defs.c.inc b/qemu/tcg/loongarch64/tcg-insn-defs.c.inc
new file mode 100644
index 0000000000..ee3b483b02
--- /dev/null
+++ b/qemu/tcg/loongarch64/tcg-insn-defs.c.inc
@@ -0,0 +1,7004 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * LoongArch instruction formats, opcodes, and encoders for TCG use.
+ *
+ * This file is auto-generated by genqemutcgdefs from
+ * https://github.com/loongson-community/loongarch-opcodes,
+ * from commit 8027da9a8157a8b47fc48ff1def292e09c5668bd.
+ * DO NOT EDIT.
+ */
+
+typedef enum {
+    OPC_CLZ_W = 0x00001400,
+    OPC_CTZ_W = 0x00001c00,
+    OPC_CLZ_D = 0x00002400,
+    OPC_CTZ_D = 0x00002c00,
+    OPC_REVB_2H = 0x00003000,
+    OPC_REVB_2W = 0x00003800,
+    OPC_REVB_D = 0x00003c00,
+    OPC_SEXT_H = 0x00005800,
+    OPC_SEXT_B = 0x00005c00,
+    OPC_ADD_W = 0x00100000,
+    OPC_ADD_D = 0x00108000,
+    OPC_SUB_W = 0x00110000,
+    OPC_SUB_D = 0x00118000,
+    OPC_SLT = 0x00120000,
+    OPC_SLTU = 0x00128000,
+    OPC_MASKEQZ = 0x00130000,
+    OPC_MASKNEZ = 0x00138000,
+    OPC_NOR = 0x00140000,
+    OPC_AND = 0x00148000,
+    OPC_OR = 0x00150000,
+    OPC_XOR = 0x00158000,
+    OPC_ORN = 0x00160000,
+    OPC_ANDN = 0x00168000,
+    OPC_SLL_W = 0x00170000,
+    OPC_SRL_W = 0x00178000,
+    OPC_SRA_W = 0x00180000,
+    OPC_SLL_D = 0x00188000,
+    OPC_SRL_D = 0x00190000,
+    OPC_SRA_D = 0x00198000,
+    OPC_ROTR_W = 0x001b0000,
+    OPC_ROTR_D = 0x001b8000,
+    OPC_MUL_W = 0x001c0000,
+    OPC_MULH_W = 0x001c8000,
+    OPC_MULH_WU = 0x001d0000,
+    OPC_MUL_D = 0x001d8000,
+    OPC_MULH_D = 0x001e0000,
+    OPC_MULH_DU = 0x001e8000,
+    OPC_DIV_W = 0x00200000,
+    OPC_MOD_W = 0x00208000,
+    OPC_DIV_WU = 0x00210000,
+    OPC_MOD_WU = 0x00218000,
+    OPC_DIV_D = 0x00220000,
+    OPC_MOD_D = 0x00228000,
+    OPC_DIV_DU = 0x00230000,
+    OPC_MOD_DU = 0x00238000,
+    OPC_SLLI_W = 0x00408000,
+    OPC_SLLI_D = 0x00410000,
+    OPC_SRLI_W = 0x00448000,
+    OPC_SRLI_D = 0x00450000,
+    OPC_SRAI_W = 0x00488000,
+    OPC_SRAI_D = 0x00490000,
+    OPC_ROTRI_W = 0x004c8000,
+    OPC_ROTRI_D = 0x004d0000,
+    OPC_BSTRINS_W = 0x00600000,
+    OPC_BSTRPICK_W = 0x00608000,
+    OPC_BSTRINS_D = 0x00800000,
+    OPC_BSTRPICK_D = 0x00c00000,
+    OPC_SLTI = 0x02000000,
+    OPC_SLTUI = 0x02400000,
+    OPC_ADDI_W = 0x02800000,
+    OPC_ADDI_D = 0x02c00000,
+    OPC_CU52I_D = 0x03000000,
+    OPC_ANDI = 0x03400000,
+    OPC_ORI = 0x03800000,
+    OPC_XORI = 0x03c00000,
+    OPC_VFMADD_S = 0x09100000,
+    OPC_VFMADD_D = 0x09200000,
+    OPC_VFMSUB_S = 0x09500000,
+    OPC_VFMSUB_D = 0x09600000,
+    OPC_VFNMADD_S = 0x09900000,
+    OPC_VFNMADD_D = 0x09a00000,
+    OPC_VFNMSUB_S = 0x09d00000,
+    OPC_VFNMSUB_D = 0x09e00000,
+    OPC_VFCMP_CAF_S = 0x0c500000,
+    OPC_VFCMP_SAF_S = 0x0c508000,
+    OPC_VFCMP_CLT_S = 0x0c510000,
+    OPC_VFCMP_SLT_S = 0x0c518000,
+    OPC_VFCMP_CEQ_S = 0x0c520000,
+    OPC_VFCMP_SEQ_S = 0x0c528000,
+    OPC_VFCMP_CLE_S = 0x0c530000,
+    OPC_VFCMP_SLE_S = 0x0c538000,
+    OPC_VFCMP_CUN_S = 0x0c540000,
+    OPC_VFCMP_SUN_S = 0x0c548000,
+    OPC_VFCMP_CULT_S = 0x0c550000,
+    OPC_VFCMP_SULT_S = 0x0c558000,
+    OPC_VFCMP_CUEQ_S = 0x0c560000,
+    OPC_VFCMP_SUEQ_S = 0x0c568000,
+    OPC_VFCMP_CULE_S = 0x0c570000,
+    OPC_VFCMP_SULE_S = 0x0c578000,
+    OPC_VFCMP_CNE_S = 0x0c580000,
+    OPC_VFCMP_SNE_S = 0x0c588000,
+    OPC_VFCMP_COR_S = 0x0c5a0000,
+    OPC_VFCMP_SOR_S = 0x0c5a8000,
+    OPC_VFCMP_CUNE_S = 0x0c5c0000,
+    OPC_VFCMP_SUNE_S = 0x0c5c8000,
+    OPC_VFCMP_CAF_D = 0x0c600000,
+    OPC_VFCMP_SAF_D = 0x0c608000,
+    OPC_VFCMP_CLT_D = 0x0c610000,
+    OPC_VFCMP_SLT_D = 0x0c618000,
+    OPC_VFCMP_CEQ_D = 0x0c620000,
+    OPC_VFCMP_SEQ_D = 0x0c628000,
+    OPC_VFCMP_CLE_D = 0x0c630000,
+    OPC_VFCMP_SLE_D = 0x0c638000,
+    OPC_VFCMP_CUN_D = 0x0c640000,
+    OPC_VFCMP_SUN_D = 0x0c648000,
+    OPC_VFCMP_CULT_D = 0x0c650000,
+    OPC_VFCMP_SULT_D = 0x0c658000,
+    OPC_VFCMP_CUEQ_D = 0x0c660000,
+    OPC_VFCMP_SUEQ_D = 0x0c668000,
+    OPC_VFCMP_CULE_D = 0x0c670000,
+    OPC_VFCMP_SULE_D = 0x0c678000,
+    OPC_VFCMP_CNE_D = 0x0c680000,
+    OPC_VFCMP_SNE_D = 0x0c688000,
+    OPC_VFCMP_COR_D = 0x0c6a0000,
+    OPC_VFCMP_SOR_D = 0x0c6a8000,
+    OPC_VFCMP_CUNE_D = 0x0c6c0000,
+    OPC_VFCMP_SUNE_D = 0x0c6c8000,
+    OPC_VBITSEL_V = 0x0d100000,
+    OPC_VSHUF_B = 0x0d500000,
+    OPC_ADDU16I_D = 0x10000000,
+    OPC_LU12I_W = 0x14000000,
+    OPC_CU32I_D = 0x16000000,
+    OPC_PCADDU2I = 0x18000000,
+    OPC_PCALAU12I = 0x1a000000,
+    OPC_PCADDU12I = 0x1c000000,
+    OPC_PCADDU18I = 0x1e000000,
+    OPC_LD_B = 0x28000000,
+    OPC_LD_H = 0x28400000,
+    OPC_LD_W = 0x28800000,
+    OPC_LD_D = 0x28c00000,
+    OPC_ST_B = 0x29000000,
+    OPC_ST_H = 0x29400000,
+    OPC_ST_W = 0x29800000,
+    OPC_ST_D = 0x29c00000,
+    OPC_LD_BU = 0x2a000000,
+    OPC_LD_HU = 0x2a400000,
+    OPC_LD_WU = 0x2a800000,
+    OPC_VLD = 0x2c000000,
+    OPC_VST = 0x2c400000,
+    OPC_VLDREPL_D = 0x30100000,
+    OPC_VLDREPL_W = 0x30200000,
+    OPC_VLDREPL_H = 0x30400000,
+    OPC_VLDREPL_B = 0x30800000,
+    OPC_VSTELM_D = 0x31100000,
+    OPC_VSTELM_W = 0x31200000,
+    OPC_VSTELM_H = 0x31400000,
+    OPC_VSTELM_B = 0x31800000,
+    OPC_LDX_B = 0x38000000,
+    OPC_LDX_H = 0x38040000,
+    OPC_LDX_W = 0x38080000,
+    OPC_LDX_D = 0x380c0000,
+    OPC_STX_B = 0x38100000,
+    OPC_STX_H = 0x38140000,
+    OPC_STX_W = 0x38180000,
+    OPC_STX_D = 0x381c0000,
+    OPC_LDX_BU = 0x38200000,
+    OPC_LDX_HU = 0x38240000,
+    OPC_LDX_WU = 0x38280000,
+    OPC_VLDX = 0x38400000,
+    OPC_VSTX = 0x38440000,
+    OPC_DBAR = 0x38720000,
+    OPC_JIRL = 0x4c000000,
+    OPC_B = 0x50000000,
+    OPC_BL = 0x54000000,
+    OPC_BEQ = 0x58000000,
+    OPC_BNE = 0x5c000000,
+    OPC_BGT = 0x60000000,
+    OPC_BLE = 0x64000000,
+    OPC_BGTU = 0x68000000,
+    OPC_BLEU = 0x6c000000,
+    OPC_VSEQ_B = 0x70000000,
+    OPC_VSEQ_H = 0x70008000,
+    OPC_VSEQ_W = 0x70010000,
+    OPC_VSEQ_D = 0x70018000,
+    OPC_VSLE_B = 0x70020000,
+    OPC_VSLE_H = 0x70028000,
+    OPC_VSLE_W = 0x70030000,
+    OPC_VSLE_D = 0x70038000,
+    OPC_VSLE_BU = 0x70040000,
+    OPC_VSLE_HU = 0x70048000,
+    OPC_VSLE_WU = 0x70050000,
+    OPC_VSLE_DU = 0x70058000,
+    OPC_VSLT_B = 0x70060000,
+    OPC_VSLT_H = 0x70068000,
+    OPC_VSLT_W = 0x70070000,
+    OPC_VSLT_D = 0x70078000,
+    OPC_VSLT_BU = 0x70080000,
+    OPC_VSLT_HU = 0x70088000,
+    OPC_VSLT_WU = 0x70090000,
+    OPC_VSLT_DU = 0x70098000,
+    OPC_VADD_B = 0x700a0000,
+    OPC_VADD_H = 0x700a8000,
+    OPC_VADD_W = 0x700b0000,
+    OPC_VADD_D = 0x700b8000,
+    OPC_VSUB_B = 0x700c0000,
+    OPC_VSUB_H = 0x700c8000,
+    OPC_VSUB_W = 0x700d0000,
+    OPC_VSUB_D = 0x700d8000,
+    OPC_VADDWEV_H_B = 0x701e0000,
+    OPC_VADDWEV_W_H = 0x701e8000,
+    OPC_VADDWEV_D_W = 0x701f0000,
+    OPC_VADDWEV_Q_D = 0x701f8000,
+    OPC_VSUBWEV_H_B = 0x70200000,
+    OPC_VSUBWEV_W_H = 0x70208000,
+    OPC_VSUBWEV_D_W = 0x70210000,
+    OPC_VSUBWEV_Q_D = 0x70218000,
+    OPC_VADDWOD_H_B = 0x70220000,
+    OPC_VADDWOD_W_H = 0x70228000,
+    OPC_VADDWOD_D_W = 0x70230000,
+    OPC_VADDWOD_Q_D = 0x70238000,
+    OPC_VSUBWOD_H_B = 0x70240000,
+    OPC_VSUBWOD_W_H = 0x70248000,
+    OPC_VSUBWOD_D_W = 0x70250000,
+    OPC_VSUBWOD_Q_D = 0x70258000,
+    OPC_VADDWEV_H_BU = 0x702e0000,
+    OPC_VADDWEV_W_HU = 0x702e8000,
+    OPC_VADDWEV_D_WU = 0x702f0000,
+    OPC_VADDWEV_Q_DU = 0x702f8000,
+    OPC_VSUBWEV_H_BU = 0x70300000,
+    OPC_VSUBWEV_W_HU = 0x70308000,
+    OPC_VSUBWEV_D_WU = 0x70310000,
+    OPC_VSUBWEV_Q_DU = 0x70318000,
+    OPC_VADDWOD_H_BU = 0x70320000,
+    OPC_VADDWOD_W_HU = 0x70328000,
+    OPC_VADDWOD_D_WU = 0x70330000,
+    OPC_VADDWOD_Q_DU = 0x70338000,
+    OPC_VSUBWOD_H_BU = 0x70340000,
+    OPC_VSUBWOD_W_HU = 0x70348000,
+    OPC_VSUBWOD_D_WU = 0x70350000,
+    OPC_VSUBWOD_Q_DU = 0x70358000,
+    OPC_VADDWEV_H_BU_B = 0x703e0000,
+    OPC_VADDWEV_W_HU_H = 0x703e8000,
+    OPC_VADDWEV_D_WU_W = 0x703f0000,
+    OPC_VADDWEV_Q_DU_D = 0x703f8000,
+    OPC_VADDWOD_H_BU_B = 0x70400000,
+    OPC_VADDWOD_W_HU_H = 0x70408000,
+    OPC_VADDWOD_D_WU_W = 0x70410000,
+    OPC_VADDWOD_Q_DU_D = 0x70418000,
+    OPC_VSADD_B = 0x70460000,
+    OPC_VSADD_H = 0x70468000,
+    OPC_VSADD_W = 0x70470000,
+    OPC_VSADD_D = 0x70478000,
+    OPC_VSSUB_B = 0x70480000,
+    OPC_VSSUB_H = 0x70488000,
+    OPC_VSSUB_W = 0x70490000,
+    OPC_VSSUB_D = 0x70498000,
+    OPC_VSADD_BU = 0x704a0000,
+    OPC_VSADD_HU = 0x704a8000,
+    OPC_VSADD_WU = 0x704b0000,
+    OPC_VSADD_DU = 0x704b8000,
+    OPC_VSSUB_BU = 0x704c0000,
+    OPC_VSSUB_HU = 0x704c8000,
+    OPC_VSSUB_WU = 0x704d0000,
+    OPC_VSSUB_DU = 0x704d8000,
+    OPC_VHADDW_H_B = 0x70540000,
+    OPC_VHADDW_W_H = 0x70548000,
+    OPC_VHADDW_D_W = 0x70550000,
+    OPC_VHADDW_Q_D = 0x70558000,
+    OPC_VHSUBW_H_B = 0x70560000,
+    OPC_VHSUBW_W_H = 0x70568000,
+    OPC_VHSUBW_D_W = 0x70570000,
+    OPC_VHSUBW_Q_D = 0x70578000,
+    OPC_VHADDW_HU_BU = 0x70580000,
+    OPC_VHADDW_WU_HU = 0x70588000,
+    OPC_VHADDW_DU_WU = 0x70590000,
+    OPC_VHADDW_QU_DU = 0x70598000,
+    OPC_VHSUBW_HU_BU = 0x705a0000,
+    OPC_VHSUBW_WU_HU = 0x705a8000,
+    OPC_VHSUBW_DU_WU = 0x705b0000,
+    OPC_VHSUBW_QU_DU = 0x705b8000,
+    OPC_VADDA_B = 0x705c0000,
+    OPC_VADDA_H = 0x705c8000,
+    OPC_VADDA_W = 0x705d0000,
+    OPC_VADDA_D = 0x705d8000,
+    OPC_VABSD_B = 0x70600000,
+    OPC_VABSD_H = 0x70608000,
+    OPC_VABSD_W = 0x70610000,
+    OPC_VABSD_D = 0x70618000,
+    OPC_VABSD_BU = 0x70620000,
+    OPC_VABSD_HU = 0x70628000,
+    OPC_VABSD_WU = 0x70630000,
+    OPC_VABSD_DU = 0x70638000,
+    OPC_VAVG_B = 0x70640000,
+    OPC_VAVG_H = 0x70648000,
+    OPC_VAVG_W = 0x70650000,
+    OPC_VAVG_D = 0x70658000,
+    OPC_VAVG_BU = 0x70660000,
+    OPC_VAVG_HU = 0x70668000,
+    OPC_VAVG_WU = 0x70670000,
+    OPC_VAVG_DU = 0x70678000,
+    OPC_VAVGR_B = 0x70680000,
+    OPC_VAVGR_H = 0x70688000,
+    OPC_VAVGR_W = 0x70690000,
+    OPC_VAVGR_D = 0x70698000,
+    OPC_VAVGR_BU = 0x706a0000,
+    OPC_VAVGR_HU = 0x706a8000,
+    OPC_VAVGR_WU = 0x706b0000,
+    OPC_VAVGR_DU = 0x706b8000,
+    OPC_VMAX_B = 0x70700000,
+    OPC_VMAX_H = 0x70708000,
+    OPC_VMAX_W = 0x70710000,
+    OPC_VMAX_D = 0x70718000,
+    OPC_VMIN_B = 0x70720000,
+    OPC_VMIN_H = 0x70728000,
+    OPC_VMIN_W = 0x70730000,
+    OPC_VMIN_D = 0x70738000,
+    OPC_VMAX_BU = 0x70740000,
+    OPC_VMAX_HU = 0x70748000,
+    OPC_VMAX_WU = 0x70750000,
+    OPC_VMAX_DU = 0x70758000,
+    OPC_VMIN_BU = 0x70760000,
+    OPC_VMIN_HU = 0x70768000,
+    OPC_VMIN_WU = 0x70770000,
+    OPC_VMIN_DU = 0x70778000,
+    OPC_VMUL_B = 0x70840000,
+    OPC_VMUL_H = 0x70848000,
+    OPC_VMUL_W = 0x70850000,
+    OPC_VMUL_D = 0x70858000,
+    OPC_VMUH_B = 0x70860000,
+    OPC_VMUH_H = 0x70868000,
+    OPC_VMUH_W = 0x70870000,
+    OPC_VMUH_D = 0x70878000,
+    OPC_VMUH_BU = 0x70880000,
+    OPC_VMUH_HU = 0x70888000,
+    OPC_VMUH_WU = 0x70890000,
+    OPC_VMUH_DU = 0x70898000,
+    OPC_VMULWEV_H_B = 0x70900000,
+    OPC_VMULWEV_W_H = 0x70908000,
+    OPC_VMULWEV_D_W = 0x70910000,
+    OPC_VMULWEV_Q_D = 0x70918000,
+    OPC_VMULWOD_H_B = 0x70920000,
+    OPC_VMULWOD_W_H = 0x70928000,
+    OPC_VMULWOD_D_W = 0x70930000,
+    OPC_VMULWOD_Q_D = 0x70938000,
+    OPC_VMULWEV_H_BU = 0x70980000,
+    OPC_VMULWEV_W_HU = 0x70988000,
+    OPC_VMULWEV_D_WU = 0x70990000,
+    OPC_VMULWEV_Q_DU = 0x70998000,
+    OPC_VMULWOD_H_BU = 0x709a0000,
+    OPC_VMULWOD_W_HU = 0x709a8000,
+    OPC_VMULWOD_D_WU = 0x709b0000,
+    OPC_VMULWOD_Q_DU = 0x709b8000,
+    OPC_VMULWEV_H_BU_B = 0x70a00000,
+    OPC_VMULWEV_W_HU_H = 0x70a08000,
+    OPC_VMULWEV_D_WU_W = 0x70a10000,
+    OPC_VMULWEV_Q_DU_D = 0x70a18000,
+    OPC_VMULWOD_H_BU_B = 0x70a20000,
+    OPC_VMULWOD_W_HU_H = 0x70a28000,
+    OPC_VMULWOD_D_WU_W = 0x70a30000,
+    OPC_VMULWOD_Q_DU_D = 0x70a38000,
+    OPC_VMADD_B = 0x70a80000,
+    OPC_VMADD_H = 0x70a88000,
+    OPC_VMADD_W = 0x70a90000,
+    OPC_VMADD_D = 0x70a98000,
+    OPC_VMSUB_B = 0x70aa0000,
+    OPC_VMSUB_H = 0x70aa8000,
+    OPC_VMSUB_W = 0x70ab0000,
+    OPC_VMSUB_D = 0x70ab8000,
+    OPC_VMADDWEV_H_B = 0x70ac0000,
+    OPC_VMADDWEV_W_H = 0x70ac8000,
+    OPC_VMADDWEV_D_W = 0x70ad0000,
+    OPC_VMADDWEV_Q_D = 0x70ad8000,
+    OPC_VMADDWOD_H_B = 0x70ae0000,
+    OPC_VMADDWOD_W_H = 0x70ae8000,
+    OPC_VMADDWOD_D_W = 0x70af0000,
+    OPC_VMADDWOD_Q_D = 0x70af8000,
+    OPC_VMADDWEV_H_BU = 0x70b40000,
+    OPC_VMADDWEV_W_HU = 0x70b48000,
+    OPC_VMADDWEV_D_WU = 0x70b50000,
+    OPC_VMADDWEV_Q_DU = 0x70b58000,
+    OPC_VMADDWOD_H_BU = 0x70b60000,
+    OPC_VMADDWOD_W_HU = 0x70b68000,
+    OPC_VMADDWOD_D_WU = 0x70b70000,
+    OPC_VMADDWOD_Q_DU = 0x70b78000,
+    OPC_VMADDWEV_H_BU_B = 0x70bc0000,
+    OPC_VMADDWEV_W_HU_H = 0x70bc8000,
+    OPC_VMADDWEV_D_WU_W = 0x70bd0000,
+    OPC_VMADDWEV_Q_DU_D = 0x70bd8000,
+    OPC_VMADDWOD_H_BU_B = 0x70be0000,
+    OPC_VMADDWOD_W_HU_H = 0x70be8000,
+    OPC_VMADDWOD_D_WU_W = 0x70bf0000,
+    OPC_VMADDWOD_Q_DU_D = 0x70bf8000,
+    OPC_VDIV_B = 0x70e00000,
+    OPC_VDIV_H = 0x70e08000,
+    OPC_VDIV_W = 0x70e10000,
+    OPC_VDIV_D = 0x70e18000,
+    OPC_VMOD_B = 0x70e20000,
+    OPC_VMOD_H = 0x70e28000,
+    OPC_VMOD_W = 0x70e30000,
+    OPC_VMOD_D = 0x70e38000,
+    OPC_VDIV_BU = 0x70e40000,
+    OPC_VDIV_HU = 0x70e48000,
+    OPC_VDIV_WU = 0x70e50000,
+    OPC_VDIV_DU = 0x70e58000,
+    OPC_VMOD_BU = 0x70e60000,
+    OPC_VMOD_HU = 0x70e68000,
+    OPC_VMOD_WU = 0x70e70000,
+    OPC_VMOD_DU = 0x70e78000,
+    OPC_VSLL_B = 0x70e80000,
+    OPC_VSLL_H = 0x70e88000,
+    OPC_VSLL_W = 0x70e90000,
+    OPC_VSLL_D = 0x70e98000,
+    OPC_VSRL_B = 0x70ea0000,
+    OPC_VSRL_H = 0x70ea8000,
+    OPC_VSRL_W = 0x70eb0000,
+    OPC_VSRL_D = 0x70eb8000,
+    OPC_VSRA_B = 0x70ec0000,
+    OPC_VSRA_H = 0x70ec8000,
+    OPC_VSRA_W = 0x70ed0000,
+    OPC_VSRA_D = 0x70ed8000,
+    OPC_VROTR_B = 0x70ee0000,
+    OPC_VROTR_H = 0x70ee8000,
+    OPC_VROTR_W = 0x70ef0000,
+    OPC_VROTR_D = 0x70ef8000,
+    OPC_VSRLR_B = 0x70f00000,
+    OPC_VSRLR_H = 0x70f08000,
+    OPC_VSRLR_W = 0x70f10000,
+    OPC_VSRLR_D = 0x70f18000,
+    OPC_VSRAR_B = 0x70f20000,
+    OPC_VSRAR_H = 0x70f28000,
+    OPC_VSRAR_W = 0x70f30000,
+    OPC_VSRAR_D = 0x70f38000,
+    OPC_VSRLN_B_H = 0x70f48000,
+    OPC_VSRLN_H_W = 0x70f50000,
+    OPC_VSRLN_W_D = 0x70f58000,
+    OPC_VSRAN_B_H = 0x70f68000,
+    OPC_VSRAN_H_W = 0x70f70000,
+    OPC_VSRAN_W_D = 0x70f78000,
+    OPC_VSRLRN_B_H = 0x70f88000,
+    OPC_VSRLRN_H_W = 0x70f90000,
+    OPC_VSRLRN_W_D = 0x70f98000,
+    OPC_VSRARN_B_H = 0x70fa8000,
+    OPC_VSRARN_H_W = 0x70fb0000,
+    OPC_VSRARN_W_D = 0x70fb8000,
+    OPC_VSSRLN_B_H = 0x70fc8000,
+    OPC_VSSRLN_H_W = 0x70fd0000,
+    OPC_VSSRLN_W_D = 0x70fd8000,
+    OPC_VSSRAN_B_H = 0x70fe8000,
+    OPC_VSSRAN_H_W = 0x70ff0000,
+    OPC_VSSRAN_W_D = 0x70ff8000,
+    OPC_VSSRLRN_B_H = 0x71008000,
+    OPC_VSSRLRN_H_W = 0x71010000,
+    OPC_VSSRLRN_W_D = 0x71018000,
+    OPC_VSSRARN_B_H = 0x71028000,
+    OPC_VSSRARN_H_W = 0x71030000,
+    OPC_VSSRARN_W_D = 0x71038000,
+    OPC_VSSRLN_BU_H = 0x71048000,
+    OPC_VSSRLN_HU_W = 0x71050000,
+    OPC_VSSRLN_WU_D = 0x71058000,
+    OPC_VSSRAN_BU_H = 0x71068000,
+    OPC_VSSRAN_HU_W = 0x71070000,
+    OPC_VSSRAN_WU_D = 0x71078000,
+    OPC_VSSRLRN_BU_H = 0x71088000,
+    OPC_VSSRLRN_HU_W = 0x71090000,
+    OPC_VSSRLRN_WU_D = 0x71098000,
+    OPC_VSSRARN_BU_H = 0x710a8000,
+    OPC_VSSRARN_HU_W = 0x710b0000,
+    OPC_VSSRARN_WU_D = 0x710b8000,
+    OPC_VBITCLR_B = 0x710c0000,
+    OPC_VBITCLR_H = 0x710c8000,
+    OPC_VBITCLR_W = 0x710d0000,
+    OPC_VBITCLR_D = 0x710d8000,
+    OPC_VBITSET_B = 0x710e0000,
+    OPC_VBITSET_H = 0x710e8000,
+    OPC_VBITSET_W = 0x710f0000,
+    OPC_VBITSET_D = 0x710f8000,
+    OPC_VBITREV_B = 0x71100000,
+    OPC_VBITREV_H = 0x71108000,
+    OPC_VBITREV_W = 0x71110000,
+    OPC_VBITREV_D = 0x71118000,
+    OPC_VPACKEV_B = 0x71160000,
+    OPC_VPACKEV_H = 0x71168000,
+    OPC_VPACKEV_W = 0x71170000,
+    OPC_VPACKEV_D = 0x71178000,
+    OPC_VPACKOD_B = 0x71180000,
+    OPC_VPACKOD_H = 0x71188000,
+    OPC_VPACKOD_W = 0x71190000,
+    OPC_VPACKOD_D = 0x71198000,
+    OPC_VILVL_B = 0x711a0000,
+    OPC_VILVL_H = 0x711a8000,
+    OPC_VILVL_W = 0x711b0000,
+    OPC_VILVL_D = 0x711b8000,
+    OPC_VILVH_B = 0x711c0000,
+    OPC_VILVH_H = 0x711c8000,
+    OPC_VILVH_W = 0x711d0000,
+    OPC_VILVH_D = 0x711d8000,
+    OPC_VPICKEV_B = 0x711e0000,
+    OPC_VPICKEV_H = 0x711e8000,
+    OPC_VPICKEV_W = 0x711f0000,
+    OPC_VPICKEV_D = 0x711f8000,
+    OPC_VPICKOD_B = 0x71200000,
+    OPC_VPICKOD_H = 0x71208000,
+    OPC_VPICKOD_W = 0x71210000,
+    OPC_VPICKOD_D = 0x71218000,
+    OPC_VREPLVE_B = 0x71220000,
+    OPC_VREPLVE_H = 0x71228000,
+    OPC_VREPLVE_W = 0x71230000,
+    OPC_VREPLVE_D = 0x71238000,
+    OPC_VAND_V = 0x71260000,
+    OPC_VOR_V = 0x71268000,
+    OPC_VXOR_V = 0x71270000,
+    OPC_VNOR_V = 0x71278000,
+    OPC_VANDN_V = 0x71280000,
+    OPC_VORN_V = 0x71288000,
+    OPC_VFRSTP_B = 0x712b0000,
+    OPC_VFRSTP_H = 0x712b8000,
+    OPC_VADD_Q = 0x712d0000,
+    OPC_VSUB_Q = 0x712d8000,
+    OPC_VSIGNCOV_B = 0x712e0000,
+    OPC_VSIGNCOV_H = 0x712e8000,
+    OPC_VSIGNCOV_W = 0x712f0000,
+    OPC_VSIGNCOV_D = 0x712f8000,
+    OPC_VFADD_S = 0x71308000,
+    OPC_VFADD_D = 0x71310000,
+    OPC_VFSUB_S = 0x71328000,
+    OPC_VFSUB_D = 0x71330000,
+    OPC_VFMUL_S = 0x71388000,
+    OPC_VFMUL_D = 0x71390000,
+    OPC_VFDIV_S = 0x713a8000,
+    OPC_VFDIV_D = 0x713b0000,
+    OPC_VFMAX_S = 0x713c8000,
+    OPC_VFMAX_D = 0x713d0000,
+    OPC_VFMIN_S = 0x713e8000,
+    OPC_VFMIN_D = 0x713f0000,
+    OPC_VFMAXA_S = 0x71408000,
+    OPC_VFMAXA_D = 0x71410000,
+    OPC_VFMINA_S = 0x71428000,
+    OPC_VFMINA_D = 0x71430000,
+    OPC_VFCVT_H_S = 0x71460000,
+    OPC_VFCVT_S_D = 0x71468000,
+    OPC_VFFINT_S_L = 0x71480000,
+    OPC_VFTINT_W_D = 0x71498000,
+    OPC_VFTINTRM_W_D = 0x714a0000,
+    OPC_VFTINTRP_W_D = 0x714a8000,
+    OPC_VFTINTRZ_W_D = 0x714b0000,
+    OPC_VFTINTRNE_W_D = 0x714b8000,
+    OPC_VSHUF_H = 0x717a8000,
+    OPC_VSHUF_W = 0x717b0000,
+    OPC_VSHUF_D = 0x717b8000,
+    OPC_VSEQI_B = 0x72800000,
+    OPC_VSEQI_H = 0x72808000,
+    OPC_VSEQI_W = 0x72810000,
+    OPC_VSEQI_D = 0x72818000,
+    OPC_VSLEI_B = 0x72820000,
+    OPC_VSLEI_H = 0x72828000,
+    OPC_VSLEI_W = 0x72830000,
+    OPC_VSLEI_D = 0x72838000,
+    OPC_VSLEI_BU = 0x72840000,
+    OPC_VSLEI_HU = 0x72848000,
+    OPC_VSLEI_WU = 0x72850000,
+    OPC_VSLEI_DU = 0x72858000,
+    OPC_VSLTI_B = 0x72860000,
+    OPC_VSLTI_H = 0x72868000,
+    OPC_VSLTI_W = 0x72870000,
+    OPC_VSLTI_D = 0x72878000,
+    OPC_VSLTI_BU = 0x72880000,
+    OPC_VSLTI_HU = 0x72888000,
+    OPC_VSLTI_WU = 0x72890000,
+    OPC_VSLTI_DU = 0x72898000,
+    OPC_VADDI_BU = 0x728a0000,
+    OPC_VADDI_HU = 0x728a8000,
+    OPC_VADDI_WU = 0x728b0000,
+    OPC_VADDI_DU = 0x728b8000,
+    OPC_VSUBI_BU = 0x728c0000,
+    OPC_VSUBI_HU = 0x728c8000,
+    OPC_VSUBI_WU = 0x728d0000,
+    OPC_VSUBI_DU = 0x728d8000,
+    OPC_VBSLL_V = 0x728e0000,
+    OPC_VBSRL_V = 0x728e8000,
+    OPC_VMAXI_B = 0x72900000,
+    OPC_VMAXI_H = 0x72908000,
+    OPC_VMAXI_W = 0x72910000,
+    OPC_VMAXI_D = 0x72918000,
+    OPC_VMINI_B = 0x72920000,
+    OPC_VMINI_H = 0x72928000,
+    OPC_VMINI_W = 0x72930000,
+    OPC_VMINI_D = 0x72938000,
+    OPC_VMAXI_BU = 0x72940000,
+    OPC_VMAXI_HU = 0x72948000,
+    OPC_VMAXI_WU = 0x72950000,
+    OPC_VMAXI_DU = 0x72958000,
+    OPC_VMINI_BU = 0x72960000,
+    OPC_VMINI_HU = 0x72968000,
+    OPC_VMINI_WU = 0x72970000,
+    OPC_VMINI_DU = 0x72978000,
+    OPC_VFRSTPI_B = 0x729a0000,
+    OPC_VFRSTPI_H = 0x729a8000,
+    OPC_VCLO_B = 0x729c0000,
+    OPC_VCLO_H = 0x729c0400,
+    OPC_VCLO_W = 0x729c0800,
+    OPC_VCLO_D = 0x729c0c00,
+    OPC_VCLZ_B = 0x729c1000,
+    OPC_VCLZ_H = 0x729c1400,
+    OPC_VCLZ_W = 0x729c1800,
+    OPC_VCLZ_D = 0x729c1c00,
+    OPC_VPCNT_B = 0x729c2000,
+    OPC_VPCNT_H = 0x729c2400,
+    OPC_VPCNT_W = 0x729c2800,
+    OPC_VPCNT_D = 0x729c2c00,
+    OPC_VNEG_B = 0x729c3000,
+    OPC_VNEG_H = 0x729c3400,
+    OPC_VNEG_W = 0x729c3800,
+    OPC_VNEG_D = 0x729c3c00,
+    OPC_VMSKLTZ_B = 0x729c4000,
+    OPC_VMSKLTZ_H = 0x729c4400,
+    OPC_VMSKLTZ_W = 0x729c4800,
+    OPC_VMSKLTZ_D = 0x729c4c00,
+    OPC_VMSKGEZ_B = 0x729c5000,
+    OPC_VMSKNZ_B = 0x729c6000,
+    OPC_VSETEQZ_V = 0x729c9800,
+    OPC_VSETNEZ_V = 0x729c9c00,
+    OPC_VSETANYEQZ_B = 0x729ca000,
+    OPC_VSETANYEQZ_H = 0x729ca400,
+    OPC_VSETANYEQZ_W = 0x729ca800,
+    OPC_VSETANYEQZ_D = 0x729cac00,
+    OPC_VSETALLNEZ_B = 0x729cb000,
+    OPC_VSETALLNEZ_H = 0x729cb400,
+    OPC_VSETALLNEZ_W = 0x729cb800,
+    OPC_VSETALLNEZ_D = 0x729cbc00,
+    OPC_VFLOGB_S = 0x729cc400,
+    OPC_VFLOGB_D = 0x729cc800,
+    OPC_VFCLASS_S = 0x729cd400,
+    OPC_VFCLASS_D = 0x729cd800,
+    OPC_VFSQRT_S = 0x729ce400,
+    OPC_VFSQRT_D = 0x729ce800,
+    OPC_VFRECIP_S = 0x729cf400,
+    OPC_VFRECIP_D = 0x729cf800,
+    OPC_VFRSQRT_S = 0x729d0400,
+    OPC_VFRSQRT_D = 0x729d0800,
+    OPC_VFRINT_S = 0x729d3400,
+    OPC_VFRINT_D = 0x729d3800,
+    OPC_VFRINTRM_S = 0x729d4400,
+    OPC_VFRINTRM_D = 0x729d4800,
+    OPC_VFRINTRP_S = 0x729d5400,
+    OPC_VFRINTRP_D = 0x729d5800,
+    OPC_VFRINTRZ_S = 0x729d6400,
+    OPC_VFRINTRZ_D = 0x729d6800,
+    OPC_VFRINTRNE_S = 0x729d7400,
+    OPC_VFRINTRNE_D = 0x729d7800,
+    OPC_VFCVTL_S_H = 0x729de800,
+    OPC_VFCVTH_S_H = 0x729dec00,
+    OPC_VFCVTL_D_S = 0x729df000,
+    OPC_VFCVTH_D_S = 0x729df400,
+    OPC_VFFINT_S_W = 0x729e0000,
+    OPC_VFFINT_S_WU = 0x729e0400,
+    OPC_VFFINT_D_L = 0x729e0800,
+    OPC_VFFINT_D_LU = 0x729e0c00,
+    OPC_VFFINTL_D_W = 0x729e1000,
+    OPC_VFFINTH_D_W = 0x729e1400,
+    OPC_VFTINT_W_S = 0x729e3000,
+    OPC_VFTINT_L_D = 0x729e3400,
+    OPC_VFTINTRM_W_S = 0x729e3800,
+    OPC_VFTINTRM_L_D = 0x729e3c00,
+    OPC_VFTINTRP_W_S = 0x729e4000,
+    OPC_VFTINTRP_L_D = 0x729e4400,
+    OPC_VFTINTRZ_W_S = 0x729e4800,
+    OPC_VFTINTRZ_L_D = 0x729e4c00,
+    OPC_VFTINTRNE_W_S = 0x729e5000,
+    OPC_VFTINTRNE_L_D = 0x729e5400,
+    OPC_VFTINT_WU_S = 0x729e5800,
+    OPC_VFTINT_LU_D = 0x729e5c00,
+    OPC_VFTINTRZ_WU_S = 0x729e7000,
+    OPC_VFTINTRZ_LU_D = 0x729e7400,
+    OPC_VFTINTL_L_S = 0x729e8000,
+    OPC_VFTINTH_L_S = 0x729e8400,
+    OPC_VFTINTRML_L_S = 0x729e8800,
+    OPC_VFTINTRMH_L_S = 0x729e8c00,
+    OPC_VFTINTRPL_L_S = 0x729e9000,
+    OPC_VFTINTRPH_L_S = 0x729e9400,
+    OPC_VFTINTRZL_L_S = 0x729e9800,
+    OPC_VFTINTRZH_L_S = 0x729e9c00,
+    OPC_VFTINTRNEL_L_S = 0x729ea000,
+    OPC_VFTINTRNEH_L_S = 0x729ea400,
+    OPC_VEXTH_H_B = 0x729ee000,
+    OPC_VEXTH_W_H = 0x729ee400,
+    OPC_VEXTH_D_W = 0x729ee800,
+    OPC_VEXTH_Q_D = 0x729eec00,
+    OPC_VEXTH_HU_BU = 0x729ef000,
+    OPC_VEXTH_WU_HU = 0x729ef400,
+    OPC_VEXTH_DU_WU = 0x729ef800,
+    OPC_VEXTH_QU_DU = 0x729efc00,
+    OPC_VREPLGR2VR_B = 0x729f0000,
+    OPC_VREPLGR2VR_H = 0x729f0400,
+    OPC_VREPLGR2VR_W = 0x729f0800,
+    OPC_VREPLGR2VR_D = 0x729f0c00,
+    OPC_VROTRI_B = 0x72a02000,
+    OPC_VROTRI_H = 0x72a04000,
+    OPC_VROTRI_W = 0x72a08000,
+    OPC_VROTRI_D = 0x72a10000,
+    OPC_VSRLRI_B = 0x72a42000,
+    OPC_VSRLRI_H = 0x72a44000,
+    OPC_VSRLRI_W = 0x72a48000,
+    OPC_VSRLRI_D = 0x72a50000,
+    OPC_VSRARI_B = 0x72a82000,
+    OPC_VSRARI_H = 0x72a84000,
+    OPC_VSRARI_W = 0x72a88000,
+    OPC_VSRARI_D = 0x72a90000,
+    OPC_VINSGR2VR_B = 0x72eb8000,
+    OPC_VINSGR2VR_H = 0x72ebc000,
+    OPC_VINSGR2VR_W = 0x72ebe000,
+    OPC_VINSGR2VR_D = 0x72ebf000,
+    OPC_VPICKVE2GR_B = 0x72ef8000,
+    OPC_VPICKVE2GR_H = 0x72efc000,
+    OPC_VPICKVE2GR_W = 0x72efe000,
+    OPC_VPICKVE2GR_D = 0x72eff000,
+    OPC_VPICKVE2GR_BU = 0x72f38000,
+    OPC_VPICKVE2GR_HU = 0x72f3c000,
+    OPC_VPICKVE2GR_WU = 0x72f3e000,
+    OPC_VPICKVE2GR_DU = 0x72f3f000,
+    OPC_VREPLVEI_B = 0x72f78000,
+    OPC_VREPLVEI_H = 0x72f7c000,
+    OPC_VREPLVEI_W = 0x72f7e000,
+    OPC_VREPLVEI_D = 0x72f7f000,
+    OPC_VSLLWIL_H_B = 0x73082000,
+    OPC_VSLLWIL_W_H = 0x73084000,
+    OPC_VSLLWIL_D_W = 0x73088000,
+    OPC_VEXTL_Q_D = 0x73090000,
+    OPC_VSLLWIL_HU_BU = 0x730c2000,
+    OPC_VSLLWIL_WU_HU = 0x730c4000,
+    OPC_VSLLWIL_DU_WU = 0x730c8000,
+    OPC_VEXTL_QU_DU = 0x730d0000,
+    OPC_VBITCLRI_B = 0x73102000,
+    OPC_VBITCLRI_H = 0x73104000,
+    OPC_VBITCLRI_W = 0x73108000,
+    OPC_VBITCLRI_D = 0x73110000,
+    OPC_VBITSETI_B = 0x73142000,
+    OPC_VBITSETI_H = 0x73144000,
+    OPC_VBITSETI_W = 0x73148000,
+    OPC_VBITSETI_D = 0x73150000,
+    OPC_VBITREVI_B = 0x73182000,
+    OPC_VBITREVI_H = 0x73184000,
+    OPC_VBITREVI_W = 0x73188000,
+    OPC_VBITREVI_D = 0x73190000,
+    OPC_VSAT_B = 0x73242000,
+    OPC_VSAT_H = 0x73244000,
+    OPC_VSAT_W = 0x73248000,
+    OPC_VSAT_D = 0x73250000,
+    OPC_VSAT_BU = 0x73282000,
+    OPC_VSAT_HU = 0x73284000,
+    OPC_VSAT_WU = 0x73288000,
+    OPC_VSAT_DU = 0x73290000,
+    OPC_VSLLI_B = 0x732c2000,
+    OPC_VSLLI_H = 0x732c4000,
+    OPC_VSLLI_W = 0x732c8000,
+    OPC_VSLLI_D = 0x732d0000,
+    OPC_VSRLI_B = 0x73302000,
+    OPC_VSRLI_H = 0x73304000,
+    OPC_VSRLI_W = 0x73308000,
+    OPC_VSRLI_D = 0x73310000,
+    OPC_VSRAI_B = 0x73342000,
+    OPC_VSRAI_H = 0x73344000,
+    OPC_VSRAI_W = 0x73348000,
+    OPC_VSRAI_D = 0x73350000,
+    OPC_VSRLNI_B_H = 0x73404000,
+    OPC_VSRLNI_H_W = 0x73408000,
+    OPC_VSRLNI_W_D = 0x73410000,
+    OPC_VSRLNI_D_Q = 0x73420000,
+    OPC_VSRLRNI_B_H = 0x73444000,
+    OPC_VSRLRNI_H_W = 0x73448000,
+    OPC_VSRLRNI_W_D = 0x73450000,
+    OPC_VSRLRNI_D_Q = 0x73460000,
+    OPC_VSSRLNI_B_H = 0x73484000,
+    OPC_VSSRLNI_H_W = 0x73488000,
+    OPC_VSSRLNI_W_D = 0x73490000,
+    OPC_VSSRLNI_D_Q = 0x734a0000,
+    OPC_VSSRLNI_BU_H = 0x734c4000,
+    OPC_VSSRLNI_HU_W = 0x734c8000,
+    OPC_VSSRLNI_WU_D = 0x734d0000,
+    OPC_VSSRLNI_DU_Q = 0x734e0000,
+    OPC_VSSRLRNI_B_H = 0x73504000,
+    OPC_VSSRLRNI_H_W = 0x73508000,
+    OPC_VSSRLRNI_W_D = 0x73510000,
+    OPC_VSSRLRNI_D_Q = 0x73520000,
+    OPC_VSSRLRNI_BU_H = 0x73544000,
+    OPC_VSSRLRNI_HU_W = 0x73548000,
+    OPC_VSSRLRNI_WU_D = 0x73550000,
+    OPC_VSSRLRNI_DU_Q = 0x73560000,
+    OPC_VSRANI_B_H = 0x73584000,
+    OPC_VSRANI_H_W = 0x73588000,
+    OPC_VSRANI_W_D = 0x73590000,
+    OPC_VSRANI_D_Q = 0x735a0000,
+    OPC_VSRARNI_B_H = 0x735c4000,
+    OPC_VSRARNI_H_W = 0x735c8000,
+    OPC_VSRARNI_W_D = 0x735d0000,
+    OPC_VSRARNI_D_Q = 0x735e0000,
+    OPC_VSSRANI_B_H = 0x73604000,
+    OPC_VSSRANI_H_W = 0x73608000,
+    OPC_VSSRANI_W_D = 0x73610000,
+    OPC_VSSRANI_D_Q = 0x73620000,
+    OPC_VSSRANI_BU_H = 0x73644000,
+    OPC_VSSRANI_HU_W = 0x73648000,
+    OPC_VSSRANI_WU_D = 0x73650000,
+    OPC_VSSRANI_DU_Q = 0x73660000,
+    OPC_VSSRARNI_B_H = 0x73684000,
+    OPC_VSSRARNI_H_W = 0x73688000,
+    OPC_VSSRARNI_W_D = 0x73690000,
+    OPC_VSSRARNI_D_Q = 0x736a0000,
+    OPC_VSSRARNI_BU_H = 0x736c4000,
+    OPC_VSSRARNI_HU_W = 0x736c8000,
+    OPC_VSSRARNI_WU_D = 0x736d0000,
+    OPC_VSSRARNI_DU_Q = 0x736e0000,
+    OPC_VEXTRINS_D = 0x73800000,
+    OPC_VEXTRINS_W = 0x73840000,
+    OPC_VEXTRINS_H = 0x73880000,
+    OPC_VEXTRINS_B = 0x738c0000,
+    OPC_VSHUF4I_B = 0x73900000,
+    OPC_VSHUF4I_H = 0x73940000,
+    OPC_VSHUF4I_W = 0x73980000,
+    OPC_VSHUF4I_D = 0x739c0000,
+    OPC_VBITSELI_B = 0x73c40000,
+    OPC_VANDI_B = 0x73d00000,
+    OPC_VORI_B = 0x73d40000,
+    OPC_VXORI_B = 0x73d80000,
+    OPC_VNORI_B = 0x73dc0000,
+    OPC_VLDI = 0x73e00000,
+    OPC_VPERMI_W = 0x73e40000,
+} LoongArchInsn;
+
+static int32_t __attribute__((unused))
+encode_d_slot(LoongArchInsn opc, uint32_t d)
+{
+    return opc | d;
+}
+
+static int32_t __attribute__((unused))
+encode_dj_slots(LoongArchInsn opc, uint32_t d, uint32_t j)
+{
+    return opc | d | j << 5;
+}
+
+static int32_t __attribute__((unused))
+encode_djk_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k)
+{
+    return opc | d | j << 5 | k << 10;
+}
+
+static int32_t __attribute__((unused))
+encode_djka_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
+                  uint32_t a)
+{
+    return opc | d | j << 5 | k << 10 | a << 15;
+}
+
+static int32_t __attribute__((unused))
+encode_djkm_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
+                  uint32_t m)
+{
+    return opc | d | j << 5 | k << 10 | m << 16;
+}
+
+static int32_t __attribute__((unused))
+encode_djkn_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
+                  uint32_t n)
+{
+    return opc | d | j << 5 | k << 10 | n << 18;
+}
+
+static int32_t __attribute__((unused))
+encode_dk_slots(LoongArchInsn opc, uint32_t d, uint32_t k)
+{
+    return opc | d | k << 10;
+}
+
+static int32_t __attribute__((unused))
+encode_cdvj_insn(LoongArchInsn opc, TCGReg cd, TCGReg vj)
+{
+    tcg_debug_assert(cd >= 0 && cd <= 0x7);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    return encode_dj_slots(opc, cd, vj & 0x1f);
+}
+
+static int32_t __attribute__((unused))
+encode_dj_insn(LoongArchInsn opc, TCGReg d, TCGReg j)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    return encode_dj_slots(opc, d, j);
+}
+
+static int32_t __attribute__((unused))
+encode_djk_insn(LoongArchInsn opc, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(k >= 0 && k <= 0x1f);
+    return encode_djk_slots(opc, d, j, k);
+}
+
+static int32_t __attribute__((unused))
+encode_djsk12_insn(LoongArchInsn opc, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk12 >= -0x800 && sk12 <= 0x7ff);
+    return encode_djk_slots(opc, d, j, sk12 & 0xfff);
+}
+
+static int32_t __attribute__((unused))
+encode_djsk16_insn(LoongArchInsn opc, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk16 >= -0x8000 && sk16 <= 0x7fff);
+    return encode_djk_slots(opc, d, j, sk16 & 0xffff);
+}
+
+static int32_t __attribute__((unused))
+encode_djuk12_insn(LoongArchInsn opc, TCGReg d, TCGReg j, uint32_t uk12)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk12 <= 0xfff);
+    return encode_djk_slots(opc, d, j, uk12);
+}
+
+static int32_t __attribute__((unused))
+encode_djuk5_insn(LoongArchInsn opc, TCGReg d, TCGReg j, uint32_t uk5)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk5 <= 0x1f);
+    return encode_djk_slots(opc, d, j, uk5);
+}
+
+static int32_t __attribute__((unused))
+encode_djuk5um5_insn(LoongArchInsn opc, TCGReg d, TCGReg j, uint32_t uk5,
+                     uint32_t um5)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk5 <= 0x1f);
+    tcg_debug_assert(um5 <= 0x1f);
+    return encode_djkm_slots(opc, d, j, uk5, um5);
+}
+
+static int32_t __attribute__((unused))
+encode_djuk6_insn(LoongArchInsn opc, TCGReg d, TCGReg j, uint32_t uk6)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk6 <= 0x3f);
+    return encode_djk_slots(opc, d, j, uk6);
+}
+
+static int32_t __attribute__((unused))
+encode_djuk6um6_insn(LoongArchInsn opc, TCGReg d, TCGReg j, uint32_t uk6,
+                     uint32_t um6)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk6 <= 0x3f);
+    tcg_debug_assert(um6 <= 0x3f);
+    return encode_djkm_slots(opc, d, j, uk6, um6);
+}
+
+static int32_t __attribute__((unused))
+encode_dsj20_insn(LoongArchInsn opc, TCGReg d, int32_t sj20)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(sj20 >= -0x80000 && sj20 <= 0x7ffff);
+    return encode_dj_slots(opc, d, sj20 & 0xfffff);
+}
+
+static int32_t __attribute__((unused))
+encode_dvjuk1_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk1)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk1 <= 0x1);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk1);
+}
+
+static int32_t __attribute__((unused))
+encode_dvjuk2_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk2)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk2 <= 0x3);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk2);
+}
+
+static int32_t __attribute__((unused))
+encode_dvjuk3_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk3)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk3 <= 0x7);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk3);
+}
+
+static int32_t __attribute__((unused))
+encode_dvjuk4_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk4)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk4 <= 0xf);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk4);
+}
+
+static int32_t __attribute__((unused))
+encode_sd10k16_insn(LoongArchInsn opc, int32_t sd10k16)
+{
+    tcg_debug_assert(sd10k16 >= -0x2000000 && sd10k16 <= 0x1ffffff);
+    return encode_dk_slots(opc, (sd10k16 >> 16) & 0x3ff, sd10k16 & 0xffff);
+}
+
+static int32_t __attribute__((unused))
+encode_ud15_insn(LoongArchInsn opc, uint32_t ud15)
+{
+    tcg_debug_assert(ud15 <= 0x7fff);
+    return encode_d_slot(opc, ud15);
+}
+
+static int32_t __attribute__((unused))
+encode_vdj_insn(LoongArchInsn opc, TCGReg vd, TCGReg j)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    return encode_dj_slots(opc, vd & 0x1f, j);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjk_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, TCGReg k)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(k >= 0 && k <= 0x1f);
+    return encode_djk_slots(opc, vd & 0x1f, j, k);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk10_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk10)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk10 >= -0x200 && sk10 <= 0x1ff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk10 & 0x3ff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk11_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk11)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk11 >= -0x400 && sk11 <= 0x3ff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk11 & 0x7ff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk12_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk12 >= -0x800 && sk12 <= 0x7ff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk12 & 0xfff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un1_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un1)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un1 <= 0x1);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un1);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un2_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un2)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un2 <= 0x3);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un2);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un3_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un3)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un3 <= 0x7);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un3);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un4_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un4)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un4 <= 0xf);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un4);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk9_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk9)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk9 >= -0x100 && sk9 <= 0xff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk9 & 0x1ff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk1_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk1)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk1 <= 0x1);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk1);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk2_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk2)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk2 <= 0x3);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk2);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk3_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk3)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk3 <= 0x7);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk3);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk4_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk4)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk4 <= 0xf);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk4);
+}
+
+static int32_t __attribute__((unused))
+encode_vdsj13_insn(LoongArchInsn opc, TCGReg vd, int32_t sj13)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(sj13 >= -0x1000 && sj13 <= 0xfff);
+    return encode_dj_slots(opc, vd & 0x1f, sj13 & 0x1fff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvj_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    return encode_dj_slots(opc, vd & 0x1f, vj & 0x1f);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjk_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(k >= 0 && k <= 0x1f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, k);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjsk5_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(sk5 >= -0x10 && sk5 <= 0xf);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, sk5 & 0x1f);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk1_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk1)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk1 <= 0x1);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk1);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk2_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk2)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk2 <= 0x3);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk2);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk3_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk3 <= 0x7);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk3);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk4_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk4 <= 0xf);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk4);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk5_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk5 <= 0x1f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk5);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk6_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk6 <= 0x3f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk6);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk7_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk7 <= 0x7f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk7);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk8_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk8 <= 0xff);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk8);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjvk_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(vk >= 0x20 && vk <= 0x3f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, vk & 0x1f);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjvkva_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg vk,
+                     TCGReg va)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(vk >= 0x20 && vk <= 0x3f);
+    tcg_debug_assert(va >= 0x20 && va <= 0x3f);
+    return encode_djka_slots(opc, vd & 0x1f, vj & 0x1f, vk & 0x1f, va & 0x1f);
+}
+
+/* Emits the `clz.w d, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_clz_w(TCGContext *s, TCGReg d, TCGReg j)
+{
+    tcg_out32(s, encode_dj_insn(OPC_CLZ_W, d, j));
+}
+
+/* Emits the `ctz.w d, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ctz_w(TCGContext *s, TCGReg d, TCGReg j)
+{
+    tcg_out32(s, encode_dj_insn(OPC_CTZ_W, d, j));
+}
+
+/* Emits the `clz.d d, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_clz_d(TCGContext *s, TCGReg d, TCGReg j)
+{
+    tcg_out32(s, encode_dj_insn(OPC_CLZ_D, d, j));
+}
+
+/* Emits the `ctz.d d, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ctz_d(TCGContext *s, TCGReg d, TCGReg j)
+{
+    tcg_out32(s, encode_dj_insn(OPC_CTZ_D, d, j));
+}
+
+/* Emits the `revb.2h d, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_revb_2h(TCGContext *s, TCGReg d, TCGReg j)
+{
+    tcg_out32(s, encode_dj_insn(OPC_REVB_2H, d, j));
+}
+
+/* Emits the `revb.2w d, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_revb_2w(TCGContext *s, TCGReg d, TCGReg j)
+{
+    tcg_out32(s, encode_dj_insn(OPC_REVB_2W, d, j));
+}
+
+/* Emits the `revb.d d, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_revb_d(TCGContext *s, TCGReg d, TCGReg j)
+{
+    tcg_out32(s, encode_dj_insn(OPC_REVB_D, d, j));
+}
+
+/* Emits the `sext.h d, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_sext_h(TCGContext *s, TCGReg d, TCGReg j)
+{
+    tcg_out32(s, encode_dj_insn(OPC_SEXT_H, d, j));
+}
+
+/* Emits the `sext.b d, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_sext_b(TCGContext *s, TCGReg d, TCGReg j)
+{
+    tcg_out32(s, encode_dj_insn(OPC_SEXT_B, d, j));
+}
+
+/* Emits the `add.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_add_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_ADD_W, d, j, k));
+}
+
+/* Emits the `add.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_add_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_ADD_D, d, j, k));
+}
+
+/* Emits the `sub.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_sub_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_SUB_W, d, j, k));
+}
+
+/* Emits the `sub.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_sub_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_SUB_D, d, j, k));
+}
+
+/* Emits the `slt d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_slt(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_SLT, d, j, k));
+}
+
+/* Emits the `sltu d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_sltu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_SLTU, d, j, k));
+}
+
+/* Emits the `maskeqz d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_maskeqz(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MASKEQZ, d, j, k));
+}
+
+/* Emits the `masknez d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_masknez(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MASKNEZ, d, j, k));
+}
+
+/* Emits the `nor d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_nor(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_NOR, d, j, k));
+}
+
+/* Emits the `and d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_and(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_AND, d, j, k));
+}
+
+/* Emits the `or d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_or(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_OR, d, j, k));
+}
+
+/* Emits the `xor d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_xor(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_XOR, d, j, k));
+}
+
+/* Emits the `orn d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_orn(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_ORN, d, j, k));
+}
+
+/* Emits the `andn d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_andn(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_ANDN, d, j, k));
+}
+
+/* Emits the `sll.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_sll_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_SLL_W, d, j, k));
+}
+
+/* Emits the `srl.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_srl_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_SRL_W, d, j, k));
+}
+
+/* Emits the `sra.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_sra_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_SRA_W, d, j, k));
+}
+
+/* Emits the `sll.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_sll_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_SLL_D, d, j, k));
+}
+
+/* Emits the `srl.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_srl_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_SRL_D, d, j, k));
+}
+
+/* Emits the `sra.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_sra_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_SRA_D, d, j, k));
+}
+
+/* Emits the `rotr.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_rotr_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_ROTR_W, d, j, k));
+}
+
+/* Emits the `rotr.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_rotr_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_ROTR_D, d, j, k));
+}
+
+/* Emits the `mul.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_mul_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MUL_W, d, j, k));
+}
+
+/* Emits the `mulh.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_mulh_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MULH_W, d, j, k));
+}
+
+/* Emits the `mulh.wu d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_mulh_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MULH_WU, d, j, k));
+}
+
+/* Emits the `mul.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_mul_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MUL_D, d, j, k));
+}
+
+/* Emits the `mulh.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_mulh_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MULH_D, d, j, k));
+}
+
+/* Emits the `mulh.du d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_mulh_du(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MULH_DU, d, j, k));
+}
+
+/* Emits the `div.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_div_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_DIV_W, d, j, k));
+}
+
+/* Emits the `mod.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_mod_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MOD_W, d, j, k));
+}
+
+/* Emits the `div.wu d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_div_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_DIV_WU, d, j, k));
+}
+
+/* Emits the `mod.wu d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_mod_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MOD_WU, d, j, k));
+}
+
+/* Emits the `div.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_div_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_DIV_D, d, j, k));
+}
+
+/* Emits the `mod.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_mod_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MOD_D, d, j, k));
+}
+
+/* Emits the `div.du d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_div_du(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_DIV_DU, d, j, k));
+}
+
+/* Emits the `mod.du d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_mod_du(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_MOD_DU, d, j, k));
+}
+
+/* Emits the `slli.w d, j, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_slli_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5)
+{
+    tcg_out32(s, encode_djuk5_insn(OPC_SLLI_W, d, j, uk5));
+}
+
+/* Emits the `slli.d d, j, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_slli_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6)
+{
+    tcg_out32(s, encode_djuk6_insn(OPC_SLLI_D, d, j, uk6));
+}
+
+/* Emits the `srli.w d, j, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_srli_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5)
+{
+    tcg_out32(s, encode_djuk5_insn(OPC_SRLI_W, d, j, uk5));
+}
+
+/* Emits the `srli.d d, j, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_srli_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6)
+{
+    tcg_out32(s, encode_djuk6_insn(OPC_SRLI_D, d, j, uk6));
+}
+
+/* Emits the `srai.w d, j, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_srai_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5)
+{
+    tcg_out32(s, encode_djuk5_insn(OPC_SRAI_W, d, j, uk5));
+}
+
+/* Emits the `srai.d d, j, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_srai_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6)
+{
+    tcg_out32(s, encode_djuk6_insn(OPC_SRAI_D, d, j, uk6));
+}
+
+/* Emits the `rotri.w d, j, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_rotri_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5)
+{
+    tcg_out32(s, encode_djuk5_insn(OPC_ROTRI_W, d, j, uk5));
+}
+
+/* Emits the `rotri.d d, j, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_rotri_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6)
+{
+    tcg_out32(s, encode_djuk6_insn(OPC_ROTRI_D, d, j, uk6));
+}
+
+/* Emits the `bstrins.w d, j, uk5, um5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bstrins_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5,
+                      uint32_t um5)
+{
+    tcg_out32(s, encode_djuk5um5_insn(OPC_BSTRINS_W, d, j, uk5, um5));
+}
+
+/* Emits the `bstrpick.w d, j, uk5, um5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bstrpick_w(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk5,
+                       uint32_t um5)
+{
+    tcg_out32(s, encode_djuk5um5_insn(OPC_BSTRPICK_W, d, j, uk5, um5));
+}
+
+/* Emits the `bstrins.d d, j, uk6, um6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bstrins_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6,
+                      uint32_t um6)
+{
+    tcg_out32(s, encode_djuk6um6_insn(OPC_BSTRINS_D, d, j, uk6, um6));
+}
+
+/* Emits the `bstrpick.d d, j, uk6, um6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bstrpick_d(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk6,
+                       uint32_t um6)
+{
+    tcg_out32(s, encode_djuk6um6_insn(OPC_BSTRPICK_D, d, j, uk6, um6));
+}
+
+/* Emits the `slti d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_slti(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_SLTI, d, j, sk12));
+}
+
+/* Emits the `sltui d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_sltui(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_SLTUI, d, j, sk12));
+}
+
+/* Emits the `addi.w d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_addi_w(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_ADDI_W, d, j, sk12));
+}
+
+/* Emits the `addi.d d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_addi_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_ADDI_D, d, j, sk12));
+}
+
+/* Emits the `cu52i.d d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_cu52i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_CU52I_D, d, j, sk12));
+}
+
+/* Emits the `andi d, j, uk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_andi(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12)
+{
+    tcg_out32(s, encode_djuk12_insn(OPC_ANDI, d, j, uk12));
+}
+
+/* Emits the `ori d, j, uk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12)
+{
+    tcg_out32(s, encode_djuk12_insn(OPC_ORI, d, j, uk12));
+}
+
+/* Emits the `xori d, j, uk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_xori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12)
+{
+    tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12));
+}
+
+/* Emits the `vfmadd.s vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMADD_S, vd, vj, vk, va));
+}
+
+/* Emits the `vfmadd.d vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMADD_D, vd, vj, vk, va));
+}
+
+/* Emits the `vfmsub.s vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMSUB_S, vd, vj, vk, va));
+}
+
+/* Emits the `vfmsub.d vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMSUB_D, vd, vj, vk, va));
+}
+
+/* Emits the `vfnmadd.s vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfnmadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMADD_S, vd, vj, vk, va));
+}
+
+/* Emits the `vfnmadd.d vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfnmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMADD_D, vd, vj, vk, va));
+}
+
+/* Emits the `vfnmsub.s vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfnmsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMSUB_S, vd, vj, vk, va));
+}
+
+/* Emits the `vfnmsub.d vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfnmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMSUB_D, vd, vj, vk, va));
+}
+
+/* Emits the `vfcmp.caf.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_caf_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CAF_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.saf.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_saf_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SAF_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.clt.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_clt_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLT_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.slt.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_slt_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLT_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.ceq.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_ceq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CEQ_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.seq.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_seq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SEQ_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cle.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cle_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sle.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sle_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cun.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cun_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUN_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sun.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sun_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUN_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cult.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cult_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULT_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sult.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sult_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULT_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cueq.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cueq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUEQ_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sueq.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sueq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUEQ_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cule.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cule_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sule.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sule_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cne.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cne_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CNE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sne.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sne_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SNE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cor.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cor_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_COR_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sor.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sor_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SOR_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cune.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cune_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUNE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sune.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sune_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUNE_S, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.caf.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_caf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CAF_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.saf.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_saf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SAF_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.clt.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_clt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.slt.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_slt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.ceq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_ceq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.seq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_seq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cle.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sle.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cun.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cun_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUN_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sun.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sun_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUN_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cult.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cult_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sult.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sult_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cueq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cueq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sueq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sueq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cule.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cule_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sule.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sule_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cne.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cne_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CNE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sne.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sne_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SNE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cor.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cor_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_COR_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sor.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sor_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SOR_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cune.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cune_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUNE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sune.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sune_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUNE_D, vd, vj, vk));
+}
+
+/* Emits the `vbitsel.v vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitsel_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VBITSEL_V, vd, vj, vk, va));
+}
+
+/* Emits the `vshuf.b vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VSHUF_B, vd, vj, vk, va));
+}
+
+/* Emits the `addu16i.d d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_ADDU16I_D, d, j, sk16));
+}
+
+/* Emits the `lu12i.w d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_LU12I_W, d, sj20));
+}
+
+/* Emits the `cu32i.d d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_cu32i_d(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_CU32I_D, d, sj20));
+}
+
+/* Emits the `pcaddu2i d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_pcaddu2i(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_PCADDU2I, d, sj20));
+}
+
+/* Emits the `pcalau12i d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_pcalau12i(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_PCALAU12I, d, sj20));
+}
+
+/* Emits the `pcaddu12i d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_pcaddu12i(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_PCADDU12I, d, sj20));
+}
+
+/* Emits the `pcaddu18i d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_pcaddu18i(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_PCADDU18I, d, sj20));
+}
+
+/* Emits the `ld.b d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_b(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_B, d, j, sk12));
+}
+
+/* Emits the `ld.h d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_h(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_H, d, j, sk12));
+}
+
+/* Emits the `ld.w d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_w(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_W, d, j, sk12));
+}
+
+/* Emits the `ld.d d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_D, d, j, sk12));
+}
+
+/* Emits the `st.b d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_st_b(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_ST_B, d, j, sk12));
+}
+
+/* Emits the `st.h d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_st_h(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_ST_H, d, j, sk12));
+}
+
+/* Emits the `st.w d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_st_w(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_ST_W, d, j, sk12));
+}
+
+/* Emits the `st.d d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_st_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_ST_D, d, j, sk12));
+}
+
+/* Emits the `ld.bu d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_bu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_BU, d, j, sk12));
+}
+
+/* Emits the `ld.hu d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_hu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_HU, d, j, sk12));
+}
+
+/* Emits the `ld.wu d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_wu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_WU, d, j, sk12));
+}
+
+/* Emits the `vld vd, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vld(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_vdjsk12_insn(OPC_VLD, vd, j, sk12));
+}
+
+/* Emits the `vst vd, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vst(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_vdjsk12_insn(OPC_VST, vd, j, sk12));
+}
+
+/* Emits the `vldrepl.d vd, j, sk9` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_d(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk9)
+{
+    tcg_out32(s, encode_vdjsk9_insn(OPC_VLDREPL_D, vd, j, sk9));
+}
+
+/* Emits the `vldrepl.w vd, j, sk10` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_w(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk10)
+{
+    tcg_out32(s, encode_vdjsk10_insn(OPC_VLDREPL_W, vd, j, sk10));
+}
+
+/* Emits the `vldrepl.h vd, j, sk11` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_h(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk11)
+{
+    tcg_out32(s, encode_vdjsk11_insn(OPC_VLDREPL_H, vd, j, sk11));
+}
+
+/* Emits the `vldrepl.b vd, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_b(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_vdjsk12_insn(OPC_VLDREPL_B, vd, j, sk12));
+}
+
+/* Emits the `vstelm.d vd, j, sk8, un1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_d(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un1)
+{
+    tcg_out32(s, encode_vdjsk8un1_insn(OPC_VSTELM_D, vd, j, sk8, un1));
+}
+
+/* Emits the `vstelm.w vd, j, sk8, un2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_w(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un2)
+{
+    tcg_out32(s, encode_vdjsk8un2_insn(OPC_VSTELM_W, vd, j, sk8, un2));
+}
+
+/* Emits the `vstelm.h vd, j, sk8, un3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_h(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un3)
+{
+    tcg_out32(s, encode_vdjsk8un3_insn(OPC_VSTELM_H, vd, j, sk8, un3));
+}
+
+/* Emits the `vstelm.b vd, j, sk8, un4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_b(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un4)
+{
+    tcg_out32(s, encode_vdjsk8un4_insn(OPC_VSTELM_B, vd, j, sk8, un4));
+}
+
+/* Emits the `ldx.b d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_b(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_B, d, j, k));
+}
+
+/* Emits the `ldx.h d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_h(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_H, d, j, k));
+}
+
+/* Emits the `ldx.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_W, d, j, k));
+}
+
+/* Emits the `ldx.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_D, d, j, k));
+}
+
+/* Emits the `stx.b d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_stx_b(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_STX_B, d, j, k));
+}
+
+/* Emits the `stx.h d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_stx_h(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_STX_H, d, j, k));
+}
+
+/* Emits the `stx.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_stx_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_STX_W, d, j, k));
+}
+
+/* Emits the `stx.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_stx_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_STX_D, d, j, k));
+}
+
+/* Emits the `ldx.bu d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_bu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_BU, d, j, k));
+}
+
+/* Emits the `ldx.hu d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_hu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_HU, d, j, k));
+}
+
+/* Emits the `ldx.wu d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_WU, d, j, k));
+}
+
+/* Emits the `vldx vd, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldx(TCGContext *s, TCGReg vd, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_vdjk_insn(OPC_VLDX, vd, j, k));
+}
+
+/* Emits the `vstx vd, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstx(TCGContext *s, TCGReg vd, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_vdjk_insn(OPC_VSTX, vd, j, k));
+}
+
+/* Emits the `dbar ud15` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_dbar(TCGContext *s, uint32_t ud15)
+{
+    tcg_out32(s, encode_ud15_insn(OPC_DBAR, ud15));
+}
+
+/* Emits the `jirl d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_jirl(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_JIRL, d, j, sk16));
+}
+
+/* Emits the `b sd10k16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_b(TCGContext *s, int32_t sd10k16)
+{
+    tcg_out32(s, encode_sd10k16_insn(OPC_B, sd10k16));
+}
+
+/* Emits the `bl sd10k16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bl(TCGContext *s, int32_t sd10k16)
+{
+    tcg_out32(s, encode_sd10k16_insn(OPC_BL, sd10k16));
+}
+
+/* Emits the `beq d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_beq(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BEQ, d, j, sk16));
+}
+
+/* Emits the `bne d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bne(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BNE, d, j, sk16));
+}
+
+/* Emits the `bgt d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bgt(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BGT, d, j, sk16));
+}
+
+/* Emits the `ble d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ble(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BLE, d, j, sk16));
+}
+
+/* Emits the `bgtu d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bgtu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BGTU, d, j, sk16));
+}
+
+/* Emits the `bleu d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bleu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BLEU, d, j, sk16));
+}
+
+/* Emits the `vseq.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_B, vd, vj, vk));
+}
+
+/* Emits the `vseq.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_H, vd, vj, vk));
+}
+
+/* Emits the `vseq.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_W, vd, vj, vk));
+}
+
+/* Emits the `vseq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vsle.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_B, vd, vj, vk));
+}
+
+/* Emits the `vsle.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_H, vd, vj, vk));
+}
+
+/* Emits the `vsle.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_W, vd, vj, vk));
+}
+
+/* Emits the `vsle.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_D, vd, vj, vk));
+}
+
+/* Emits the `vsle.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_BU, vd, vj, vk));
+}
+
+/* Emits the `vsle.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_HU, vd, vj, vk));
+}
+
+/* Emits the `vsle.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_WU, vd, vj, vk));
+}
+
+/* Emits the `vsle.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_DU, vd, vj, vk));
+}
+
+/* Emits the `vslt.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_B, vd, vj, vk));
+}
+
+/* Emits the `vslt.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_H, vd, vj, vk));
+}
+
+/* Emits the `vslt.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_W, vd, vj, vk));
+}
+
+/* Emits the `vslt.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_D, vd, vj, vk));
+}
+
+/* Emits the `vslt.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_BU, vd, vj, vk));
+}
+
+/* Emits the `vslt.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_HU, vd, vj, vk));
+}
+
+/* Emits the `vslt.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_WU, vd, vj, vk));
+}
+
+/* Emits the `vslt.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_DU, vd, vj, vk));
+}
+
+/* Emits the `vadd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_B, vd, vj, vk));
+}
+
+/* Emits the `vadd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_H, vd, vj, vk));
+}
+
+/* Emits the `vadd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_W, vd, vj, vk));
+}
+
+/* Emits the `vadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_D, vd, vj, vk));
+}
+
+/* Emits the `vsub.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_B, vd, vj, vk));
+}
+
+/* Emits the `vsub.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_H, vd, vj, vk));
+}
+
+/* Emits the `vsub.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_W, vd, vj, vk));
+}
+
+/* Emits the `vsub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vsadd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_B, vd, vj, vk));
+}
+
+/* Emits the `vsadd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_H, vd, vj, vk));
+}
+
+/* Emits the `vsadd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_W, vd, vj, vk));
+}
+
+/* Emits the `vsadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_D, vd, vj, vk));
+}
+
+/* Emits the `vssub.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_B, vd, vj, vk));
+}
+
+/* Emits the `vssub.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_H, vd, vj, vk));
+}
+
+/* Emits the `vssub.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_W, vd, vj, vk));
+}
+
+/* Emits the `vssub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vsadd.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_BU, vd, vj, vk));
+}
+
+/* Emits the `vsadd.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_HU, vd, vj, vk));
+}
+
+/* Emits the `vsadd.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_WU, vd, vj, vk));
+}
+
+/* Emits the `vsadd.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_DU, vd, vj, vk));
+}
+
+/* Emits the `vssub.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_BU, vd, vj, vk));
+}
+
+/* Emits the `vssub.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_HU, vd, vj, vk));
+}
+
+/* Emits the `vssub.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_WU, vd, vj, vk));
+}
+
+/* Emits the `vssub.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_DU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_H_B, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_W_H, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_D_W, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_H_B, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_W_H, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_D_W, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.hu.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_HU_BU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.wu.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_WU_HU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.du.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_DU_WU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.qu.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_qu_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_QU_DU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.hu.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_HU_BU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.wu.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_WU_HU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.du.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_DU_WU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.qu.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_qu_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_QU_DU, vd, vj, vk));
+}
+
+/* Emits the `vadda.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_B, vd, vj, vk));
+}
+
+/* Emits the `vadda.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_H, vd, vj, vk));
+}
+
+/* Emits the `vadda.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_W, vd, vj, vk));
+}
+
+/* Emits the `vadda.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_D, vd, vj, vk));
+}
+
+/* Emits the `vabsd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_B, vd, vj, vk));
+}
+
+/* Emits the `vabsd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_H, vd, vj, vk));
+}
+
+/* Emits the `vabsd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_W, vd, vj, vk));
+}
+
+/* Emits the `vabsd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_D, vd, vj, vk));
+}
+
+/* Emits the `vabsd.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_BU, vd, vj, vk));
+}
+
+/* Emits the `vabsd.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_HU, vd, vj, vk));
+}
+
+/* Emits the `vabsd.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_WU, vd, vj, vk));
+}
+
+/* Emits the `vabsd.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_DU, vd, vj, vk));
+}
+
+/* Emits the `vavg.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_B, vd, vj, vk));
+}
+
+/* Emits the `vavg.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_H, vd, vj, vk));
+}
+
+/* Emits the `vavg.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_W, vd, vj, vk));
+}
+
+/* Emits the `vavg.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_D, vd, vj, vk));
+}
+
+/* Emits the `vavg.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_BU, vd, vj, vk));
+}
+
+/* Emits the `vavg.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_HU, vd, vj, vk));
+}
+
+/* Emits the `vavg.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_WU, vd, vj, vk));
+}
+
+/* Emits the `vavg.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_DU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_B, vd, vj, vk));
+}
+
+/* Emits the `vavgr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_H, vd, vj, vk));
+}
+
+/* Emits the `vavgr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_W, vd, vj, vk));
+}
+
+/* Emits the `vavgr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_D, vd, vj, vk));
+}
+
+/* Emits the `vavgr.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_BU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_HU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_WU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_DU, vd, vj, vk));
+}
+
+/* Emits the `vmax.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_B, vd, vj, vk));
+}
+
+/* Emits the `vmax.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_H, vd, vj, vk));
+}
+
+/* Emits the `vmax.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_W, vd, vj, vk));
+}
+
+/* Emits the `vmax.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_D, vd, vj, vk));
+}
+
+/* Emits the `vmin.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_B, vd, vj, vk));
+}
+
+/* Emits the `vmin.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_H, vd, vj, vk));
+}
+
+/* Emits the `vmin.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_W, vd, vj, vk));
+}
+
+/* Emits the `vmin.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_D, vd, vj, vk));
+}
+
+/* Emits the `vmax.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_BU, vd, vj, vk));
+}
+
+/* Emits the `vmax.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_HU, vd, vj, vk));
+}
+
+/* Emits the `vmax.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_WU, vd, vj, vk));
+}
+
+/* Emits the `vmax.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_DU, vd, vj, vk));
+}
+
+/* Emits the `vmin.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_BU, vd, vj, vk));
+}
+
+/* Emits the `vmin.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_HU, vd, vj, vk));
+}
+
+/* Emits the `vmin.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_WU, vd, vj, vk));
+}
+
+/* Emits the `vmin.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_DU, vd, vj, vk));
+}
+
+/* Emits the `vmul.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_B, vd, vj, vk));
+}
+
+/* Emits the `vmul.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_H, vd, vj, vk));
+}
+
+/* Emits the `vmul.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_W, vd, vj, vk));
+}
+
+/* Emits the `vmul.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_D, vd, vj, vk));
+}
+
+/* Emits the `vmuh.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_B, vd, vj, vk));
+}
+
+/* Emits the `vmuh.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_H, vd, vj, vk));
+}
+
+/* Emits the `vmuh.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_W, vd, vj, vk));
+}
+
+/* Emits the `vmuh.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_D, vd, vj, vk));
+}
+
+/* Emits the `vmuh.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_BU, vd, vj, vk));
+}
+
+/* Emits the `vmuh.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_HU, vd, vj, vk));
+}
+
+/* Emits the `vmuh.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_WU, vd, vj, vk));
+}
+
+/* Emits the `vmuh.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_DU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vmadd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_B, vd, vj, vk));
+}
+
+/* Emits the `vmadd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_H, vd, vj, vk));
+}
+
+/* Emits the `vmadd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_W, vd, vj, vk));
+}
+
+/* Emits the `vmadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_D, vd, vj, vk));
+}
+
+/* Emits the `vmsub.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_B, vd, vj, vk));
+}
+
+/* Emits the `vmsub.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_H, vd, vj, vk));
+}
+
+/* Emits the `vmsub.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_W, vd, vj, vk));
+}
+
+/* Emits the `vmsub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vdiv.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_B, vd, vj, vk));
+}
+
+/* Emits the `vdiv.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_H, vd, vj, vk));
+}
+
+/* Emits the `vdiv.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_W, vd, vj, vk));
+}
+
+/* Emits the `vdiv.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_D, vd, vj, vk));
+}
+
+/* Emits the `vmod.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_B, vd, vj, vk));
+}
+
+/* Emits the `vmod.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_H, vd, vj, vk));
+}
+
+/* Emits the `vmod.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_W, vd, vj, vk));
+}
+
+/* Emits the `vmod.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_D, vd, vj, vk));
+}
+
+/* Emits the `vdiv.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_BU, vd, vj, vk));
+}
+
+/* Emits the `vdiv.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_HU, vd, vj, vk));
+}
+
+/* Emits the `vdiv.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_WU, vd, vj, vk));
+}
+
+/* Emits the `vdiv.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_DU, vd, vj, vk));
+}
+
+/* Emits the `vmod.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_BU, vd, vj, vk));
+}
+
+/* Emits the `vmod.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_HU, vd, vj, vk));
+}
+
+/* Emits the `vmod.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_WU, vd, vj, vk));
+}
+
+/* Emits the `vmod.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_DU, vd, vj, vk));
+}
+
+/* Emits the `vsll.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_B, vd, vj, vk));
+}
+
+/* Emits the `vsll.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_H, vd, vj, vk));
+}
+
+/* Emits the `vsll.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_W, vd, vj, vk));
+}
+
+/* Emits the `vsll.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_D, vd, vj, vk));
+}
+
+/* Emits the `vsrl.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_B, vd, vj, vk));
+}
+
+/* Emits the `vsrl.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_H, vd, vj, vk));
+}
+
+/* Emits the `vsrl.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_W, vd, vj, vk));
+}
+
+/* Emits the `vsrl.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_D, vd, vj, vk));
+}
+
+/* Emits the `vsra.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_B, vd, vj, vk));
+}
+
+/* Emits the `vsra.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_H, vd, vj, vk));
+}
+
+/* Emits the `vsra.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_W, vd, vj, vk));
+}
+
+/* Emits the `vsra.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_D, vd, vj, vk));
+}
+
+/* Emits the `vrotr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_B, vd, vj, vk));
+}
+
+/* Emits the `vrotr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_H, vd, vj, vk));
+}
+
+/* Emits the `vrotr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_W, vd, vj, vk));
+}
+
+/* Emits the `vrotr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_D, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_B, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_H, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_W, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_D, vd, vj, vk));
+}
+
+/* Emits the `vsrar.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_B, vd, vj, vk));
+}
+
+/* Emits the `vsrar.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_H, vd, vj, vk));
+}
+
+/* Emits the `vsrar.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_W, vd, vj, vk));
+}
+
+/* Emits the `vsrar.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_D, vd, vj, vk));
+}
+
+/* Emits the `vsrln.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrln_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsrln.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrln_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsrln.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrln_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vsran.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsran_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsran.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsran_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsran.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsran_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vsrlrn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsrlrn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsrlrn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vsrarn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsrarn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsrarn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrln.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssrln.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssrln.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssran.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssran.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssran.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrln.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssrln.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssrln.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vssran.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssran.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssran.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_B, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_H, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_W, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_D, vd, vj, vk));
+}
+
+/* Emits the `vbitset.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_B, vd, vj, vk));
+}
+
+/* Emits the `vbitset.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_H, vd, vj, vk));
+}
+
+/* Emits the `vbitset.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_W, vd, vj, vk));
+}
+
+/* Emits the `vbitset.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_D, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_B, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_H, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_W, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_D, vd, vj, vk));
+}
+
+/* Emits the `vpackev.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_B, vd, vj, vk));
+}
+
+/* Emits the `vpackev.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_H, vd, vj, vk));
+}
+
+/* Emits the `vpackev.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_W, vd, vj, vk));
+}
+
+/* Emits the `vpackev.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_D, vd, vj, vk));
+}
+
+/* Emits the `vpackod.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_B, vd, vj, vk));
+}
+
+/* Emits the `vpackod.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_H, vd, vj, vk));
+}
+
+/* Emits the `vpackod.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_W, vd, vj, vk));
+}
+
+/* Emits the `vpackod.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_D, vd, vj, vk));
+}
+
+/* Emits the `vilvl.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_B, vd, vj, vk));
+}
+
+/* Emits the `vilvl.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_H, vd, vj, vk));
+}
+
+/* Emits the `vilvl.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_W, vd, vj, vk));
+}
+
+/* Emits the `vilvl.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_D, vd, vj, vk));
+}
+
+/* Emits the `vilvh.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_B, vd, vj, vk));
+}
+
+/* Emits the `vilvh.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_H, vd, vj, vk));
+}
+
+/* Emits the `vilvh.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_W, vd, vj, vk));
+}
+
+/* Emits the `vilvh.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_D, vd, vj, vk));
+}
+
+/* Emits the `vpickev.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_B, vd, vj, vk));
+}
+
+/* Emits the `vpickev.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_H, vd, vj, vk));
+}
+
+/* Emits the `vpickev.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_W, vd, vj, vk));
+}
+
+/* Emits the `vpickev.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_D, vd, vj, vk));
+}
+
+/* Emits the `vpickod.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_B, vd, vj, vk));
+}
+
+/* Emits the `vpickod.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_H, vd, vj, vk));
+}
+
+/* Emits the `vpickod.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_W, vd, vj, vk));
+}
+
+/* Emits the `vpickod.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_D, vd, vj, vk));
+}
+
+/* Emits the `vreplve.b vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_B, vd, vj, k));
+}
+
+/* Emits the `vreplve.h vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_H, vd, vj, k));
+}
+
+/* Emits the `vreplve.w vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_W, vd, vj, k));
+}
+
+/* Emits the `vreplve.d vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_D, vd, vj, k));
+}
+
+/* Emits the `vand.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vand_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAND_V, vd, vj, vk));
+}
+
+/* Emits the `vor.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VOR_V, vd, vj, vk));
+}
+
+/* Emits the `vxor.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vxor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VXOR_V, vd, vj, vk));
+}
+
+/* Emits the `vnor.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vnor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VNOR_V, vd, vj, vk));
+}
+
+/* Emits the `vandn.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vandn_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VANDN_V, vd, vj, vk));
+}
+
+/* Emits the `vorn.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vorn_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VORN_V, vd, vj, vk));
+}
+
+/* Emits the `vfrstp.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstp_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFRSTP_B, vd, vj, vk));
+}
+
+/* Emits the `vfrstp.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstp_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFRSTP_H, vd, vj, vk));
+}
+
+/* Emits the `vadd.q vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_q(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_Q, vd, vj, vk));
+}
+
+/* Emits the `vsub.q vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_q(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_Q, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_B, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_H, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_W, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_D, vd, vj, vk));
+}
+
+/* Emits the `vfadd.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFADD_S, vd, vj, vk));
+}
+
+/* Emits the `vfadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFADD_D, vd, vj, vk));
+}
+
+/* Emits the `vfsub.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFSUB_S, vd, vj, vk));
+}
+
+/* Emits the `vfsub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vfmul.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmul_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMUL_S, vd, vj, vk));
+}
+
+/* Emits the `vfmul.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmul_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMUL_D, vd, vj, vk));
+}
+
+/* Emits the `vfdiv.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfdiv_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFDIV_S, vd, vj, vk));
+}
+
+/* Emits the `vfdiv.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfdiv_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFDIV_D, vd, vj, vk));
+}
+
+/* Emits the `vfmax.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmax_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAX_S, vd, vj, vk));
+}
+
+/* Emits the `vfmax.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmax_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAX_D, vd, vj, vk));
+}
+
+/* Emits the `vfmin.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmin_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMIN_S, vd, vj, vk));
+}
+
+/* Emits the `vfmin.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmin_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMIN_D, vd, vj, vk));
+}
+
+/* Emits the `vfmaxa.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmaxa_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAXA_S, vd, vj, vk));
+}
+
+/* Emits the `vfmaxa.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmaxa_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAXA_D, vd, vj, vk));
+}
+
+/* Emits the `vfmina.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmina_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMINA_S, vd, vj, vk));
+}
+
+/* Emits the `vfmina.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmina_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMINA_D, vd, vj, vk));
+}
+
+/* Emits the `vfcvt.h.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvt_h_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCVT_H_S, vd, vj, vk));
+}
+
+/* Emits the `vfcvt.s.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvt_s_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCVT_S_D, vd, vj, vk));
+}
+
+/* Emits the `vffint.s.l vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_s_l(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFFINT_S_L, vd, vj, vk));
+}
+
+/* Emits the `vftint.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINT_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrm.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrm_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRM_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrp.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrp_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRP_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrz.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRZ_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrne.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrne_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRNE_W_D, vd, vj, vk));
+}
+
+/* Emits the `vshuf.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_H, vd, vj, vk));
+}
+
+/* Emits the `vshuf.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_W, vd, vj, vk));
+}
+
+/* Emits the `vshuf.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_D, vd, vj, vk));
+}
+
+/* Emits the `vseqi.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_B, vd, vj, sk5));
+}
+
+/* Emits the `vseqi.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_H, vd, vj, sk5));
+}
+
+/* Emits the `vseqi.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_W, vd, vj, sk5));
+}
+
+/* Emits the `vseqi.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_D, vd, vj, sk5));
+}
+
+/* Emits the `vslei.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_B, vd, vj, sk5));
+}
+
+/* Emits the `vslei.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_H, vd, vj, sk5));
+}
+
+/* Emits the `vslei.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_W, vd, vj, sk5));
+}
+
+/* Emits the `vslei.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_D, vd, vj, sk5));
+}
+
+/* Emits the `vslei.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vslei.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vslei.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vslei.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_B, vd, vj, sk5));
+}
+
+/* Emits the `vslti.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_H, vd, vj, sk5));
+}
+
+/* Emits the `vslti.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_W, vd, vj, sk5));
+}
+
+/* Emits the `vslti.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_D, vd, vj, sk5));
+}
+
+/* Emits the `vslti.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vbsll.v vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbsll_v(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBSLL_V, vd, vj, uk5));
+}
+
+/* Emits the `vbsrl.v vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbsrl_v(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBSRL_V, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_B, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_H, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_W, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_D, vd, vj, sk5));
+}
+
+/* Emits the `vmini.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_B, vd, vj, sk5));
+}
+
+/* Emits the `vmini.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_H, vd, vj, sk5));
+}
+
+/* Emits the `vmini.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_W, vd, vj, sk5));
+}
+
+/* Emits the `vmini.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_D, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vfrstpi.b vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstpi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VFRSTPI_B, vd, vj, uk5));
+}
+
+/* Emits the `vfrstpi.h vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstpi_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VFRSTPI_H, vd, vj, uk5));
+}
+
+/* Emits the `vclo.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_B, vd, vj));
+}
+
+/* Emits the `vclo.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_H, vd, vj));
+}
+
+/* Emits the `vclo.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_W, vd, vj));
+}
+
+/* Emits the `vclo.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_D, vd, vj));
+}
+
+/* Emits the `vclz.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_B, vd, vj));
+}
+
+/* Emits the `vclz.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_H, vd, vj));
+}
+
+/* Emits the `vclz.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_W, vd, vj));
+}
+
+/* Emits the `vclz.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_D, vd, vj));
+}
+
+/* Emits the `vpcnt.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_B, vd, vj));
+}
+
+/* Emits the `vpcnt.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_H, vd, vj));
+}
+
+/* Emits the `vpcnt.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_W, vd, vj));
+}
+
+/* Emits the `vpcnt.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_D, vd, vj));
+}
+
+/* Emits the `vneg.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_B, vd, vj));
+}
+
+/* Emits the `vneg.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_H, vd, vj));
+}
+
+/* Emits the `vneg.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_W, vd, vj));
+}
+
+/* Emits the `vneg.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_D, vd, vj));
+}
+
+/* Emits the `vmskltz.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_B, vd, vj));
+}
+
+/* Emits the `vmskltz.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_H, vd, vj));
+}
+
+/* Emits the `vmskltz.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_W, vd, vj));
+}
+
+/* Emits the `vmskltz.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_D, vd, vj));
+}
+
+/* Emits the `vmskgez.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskgez_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKGEZ_B, vd, vj));
+}
+
+/* Emits the `vmsknz.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsknz_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKNZ_B, vd, vj));
+}
+
+/* Emits the `vseteqz.v cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseteqz_v(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETEQZ_V, cd, vj));
+}
+
+/* Emits the `vsetnez.v cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetnez_v(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETNEZ_V, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.b cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_b(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_B, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.h cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_h(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_H, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.w cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_w(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_W, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.d cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_d(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_D, cd, vj));
+}
+
+/* Emits the `vsetallnez.b cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_b(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_B, cd, vj));
+}
+
+/* Emits the `vsetallnez.h cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_h(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_H, cd, vj));
+}
+
+/* Emits the `vsetallnez.w cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_w(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_W, cd, vj));
+}
+
+/* Emits the `vsetallnez.d cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_d(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_D, cd, vj));
+}
+
+/* Emits the `vflogb.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vflogb_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFLOGB_S, vd, vj));
+}
+
+/* Emits the `vflogb.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vflogb_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFLOGB_D, vd, vj));
+}
+
+/* Emits the `vfclass.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfclass_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCLASS_S, vd, vj));
+}
+
+/* Emits the `vfclass.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfclass_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCLASS_D, vd, vj));
+}
+
+/* Emits the `vfsqrt.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsqrt_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFSQRT_S, vd, vj));
+}
+
+/* Emits the `vfsqrt.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsqrt_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFSQRT_D, vd, vj));
+}
+
+/* Emits the `vfrecip.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrecip_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRECIP_S, vd, vj));
+}
+
+/* Emits the `vfrecip.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrecip_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRECIP_D, vd, vj));
+}
+
+/* Emits the `vfrsqrt.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrsqrt_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRSQRT_S, vd, vj));
+}
+
+/* Emits the `vfrsqrt.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrsqrt_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRSQRT_D, vd, vj));
+}
+
+/* Emits the `vfrint.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrint_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINT_S, vd, vj));
+}
+
+/* Emits the `vfrint.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrint_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINT_D, vd, vj));
+}
+
+/* Emits the `vfrintrm.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrm_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRM_S, vd, vj));
+}
+
+/* Emits the `vfrintrm.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrm_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRM_D, vd, vj));
+}
+
+/* Emits the `vfrintrp.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrp_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRP_S, vd, vj));
+}
+
+/* Emits the `vfrintrp.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrp_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRP_D, vd, vj));
+}
+
+/* Emits the `vfrintrz.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrz_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRZ_S, vd, vj));
+}
+
+/* Emits the `vfrintrz.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrz_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRZ_D, vd, vj));
+}
+
+/* Emits the `vfrintrne.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrne_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRNE_S, vd, vj));
+}
+
+/* Emits the `vfrintrne.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrne_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRNE_D, vd, vj));
+}
+
+/* Emits the `vfcvtl.s.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvtl_s_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTL_S_H, vd, vj));
+}
+
+/* Emits the `vfcvth.s.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvth_s_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTH_S_H, vd, vj));
+}
+
+/* Emits the `vfcvtl.d.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvtl_d_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTL_D_S, vd, vj));
+}
+
+/* Emits the `vfcvth.d.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvth_d_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTH_D_S, vd, vj));
+}
+
+/* Emits the `vffint.s.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_s_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_S_W, vd, vj));
+}
+
+/* Emits the `vffint.s.wu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_s_wu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_S_WU, vd, vj));
+}
+
+/* Emits the `vffint.d.l vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_d_l(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_D_L, vd, vj));
+}
+
+/* Emits the `vffint.d.lu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_d_lu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_D_LU, vd, vj));
+}
+
+/* Emits the `vffintl.d.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffintl_d_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINTL_D_W, vd, vj));
+}
+
+/* Emits the `vffinth.d.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffinth_d_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINTH_D_W, vd, vj));
+}
+
+/* Emits the `vftint.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_W_S, vd, vj));
+}
+
+/* Emits the `vftint.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_L_D, vd, vj));
+}
+
+/* Emits the `vftintrm.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrm_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRM_W_S, vd, vj));
+}
+
+/* Emits the `vftintrm.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrm_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRM_L_D, vd, vj));
+}
+
+/* Emits the `vftintrp.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrp_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRP_W_S, vd, vj));
+}
+
+/* Emits the `vftintrp.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrp_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRP_L_D, vd, vj));
+}
+
+/* Emits the `vftintrz.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_W_S, vd, vj));
+}
+
+/* Emits the `vftintrz.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_L_D, vd, vj));
+}
+
+/* Emits the `vftintrne.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrne_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNE_W_S, vd, vj));
+}
+
+/* Emits the `vftintrne.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrne_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNE_L_D, vd, vj));
+}
+
+/* Emits the `vftint.wu.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_wu_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_WU_S, vd, vj));
+}
+
+/* Emits the `vftint.lu.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_lu_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_LU_D, vd, vj));
+}
+
+/* Emits the `vftintrz.wu.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_wu_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_WU_S, vd, vj));
+}
+
+/* Emits the `vftintrz.lu.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_lu_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_LU_D, vd, vj));
+}
+
+/* Emits the `vftintl.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintl_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTL_L_S, vd, vj));
+}
+
+/* Emits the `vftinth.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftinth_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrml.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrml_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRML_L_S, vd, vj));
+}
+
+/* Emits the `vftintrmh.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrmh_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRMH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrpl.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrpl_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRPL_L_S, vd, vj));
+}
+
+/* Emits the `vftintrph.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrph_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRPH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrzl.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrzl_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZL_L_S, vd, vj));
+}
+
+/* Emits the `vftintrzh.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrzh_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrnel.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrnel_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNEL_L_S, vd, vj));
+}
+
+/* Emits the `vftintrneh.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrneh_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNEH_L_S, vd, vj));
+}
+
+/* Emits the `vexth.h.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_h_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_H_B, vd, vj));
+}
+
+/* Emits the `vexth.w.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_w_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_W_H, vd, vj));
+}
+
+/* Emits the `vexth.d.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_d_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_D_W, vd, vj));
+}
+
+/* Emits the `vexth.q.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_q_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_Q_D, vd, vj));
+}
+
+/* Emits the `vexth.hu.bu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_HU_BU, vd, vj));
+}
+
+/* Emits the `vexth.wu.hu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_WU_HU, vd, vj));
+}
+
+/* Emits the `vexth.du.wu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_du_wu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_DU_WU, vd, vj));
+}
+
+/* Emits the `vexth.qu.du vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_qu_du(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_QU_DU, vd, vj));
+}
+
+/* Emits the `vreplgr2vr.b vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_b(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_B, vd, j));
+}
+
+/* Emits the `vreplgr2vr.h vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_h(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_H, vd, j));
+}
+
+/* Emits the `vreplgr2vr.w vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_w(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_W, vd, j));
+}
+
+/* Emits the `vreplgr2vr.d vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_d(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_D, vd, j));
+}
+
+/* Emits the `vrotri.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VROTRI_B, vd, vj, uk3));
+}
+
+/* Emits the `vrotri.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VROTRI_H, vd, vj, uk4));
+}
+
+/* Emits the `vrotri.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VROTRI_W, vd, vj, uk5));
+}
+
+/* Emits the `vrotri.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VROTRI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrlri.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRLRI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrlri.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLRI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrlri.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLRI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrlri.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLRI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrari.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRARI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrari.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRARI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrari.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRARI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrari.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRARI_D, vd, vj, uk6));
+}
+
+/* Emits the `vinsgr2vr.b vd, j, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_b(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdjuk4_insn(OPC_VINSGR2VR_B, vd, j, uk4));
+}
+
+/* Emits the `vinsgr2vr.h vd, j, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_h(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdjuk3_insn(OPC_VINSGR2VR_H, vd, j, uk3));
+}
+
+/* Emits the `vinsgr2vr.w vd, j, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_w(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk2)
+{
+    tcg_out32(s, encode_vdjuk2_insn(OPC_VINSGR2VR_W, vd, j, uk2));
+}
+
+/* Emits the `vinsgr2vr.d vd, j, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_d(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk1)
+{
+    tcg_out32(s, encode_vdjuk1_insn(OPC_VINSGR2VR_D, vd, j, uk1));
+}
+
+/* Emits the `vpickve2gr.b d, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_b(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_dvjuk4_insn(OPC_VPICKVE2GR_B, d, vj, uk4));
+}
+
+/* Emits the `vpickve2gr.h d, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_h(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_dvjuk3_insn(OPC_VPICKVE2GR_H, d, vj, uk3));
+}
+
+/* Emits the `vpickve2gr.w d, vj, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_w(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk2)
+{
+    tcg_out32(s, encode_dvjuk2_insn(OPC_VPICKVE2GR_W, d, vj, uk2));
+}
+
+/* Emits the `vpickve2gr.d d, vj, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_d(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk1)
+{
+    tcg_out32(s, encode_dvjuk1_insn(OPC_VPICKVE2GR_D, d, vj, uk1));
+}
+
+/* Emits the `vpickve2gr.bu d, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_bu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_dvjuk4_insn(OPC_VPICKVE2GR_BU, d, vj, uk4));
+}
+
+/* Emits the `vpickve2gr.hu d, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_hu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_dvjuk3_insn(OPC_VPICKVE2GR_HU, d, vj, uk3));
+}
+
+/* Emits the `vpickve2gr.wu d, vj, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_wu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk2)
+{
+    tcg_out32(s, encode_dvjuk2_insn(OPC_VPICKVE2GR_WU, d, vj, uk2));
+}
+
+/* Emits the `vpickve2gr.du d, vj, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_du(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk1)
+{
+    tcg_out32(s, encode_dvjuk1_insn(OPC_VPICKVE2GR_DU, d, vj, uk1));
+}
+
+/* Emits the `vreplvei.b vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VREPLVEI_B, vd, vj, uk4));
+}
+
+/* Emits the `vreplvei.h vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VREPLVEI_H, vd, vj, uk3));
+}
+
+/* Emits the `vreplvei.w vd, vj, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk2)
+{
+    tcg_out32(s, encode_vdvjuk2_insn(OPC_VREPLVEI_W, vd, vj, uk2));
+}
+
+/* Emits the `vreplvei.d vd, vj, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk1)
+{
+    tcg_out32(s, encode_vdvjuk1_insn(OPC_VREPLVEI_D, vd, vj, uk1));
+}
+
+/* Emits the `vsllwil.h.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_h_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLWIL_H_B, vd, vj, uk3));
+}
+
+/* Emits the `vsllwil.w.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_w_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLWIL_W_H, vd, vj, uk4));
+}
+
+/* Emits the `vsllwil.d.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_d_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLWIL_D_W, vd, vj, uk5));
+}
+
+/* Emits the `vextl.q.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextl_q_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTL_Q_D, vd, vj));
+}
+
+/* Emits the `vsllwil.hu.bu vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLWIL_HU_BU, vd, vj, uk3));
+}
+
+/* Emits the `vsllwil.wu.hu vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLWIL_WU_HU, vd, vj, uk4));
+}
+
+/* Emits the `vsllwil.du.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLWIL_DU_WU, vd, vj, uk5));
+}
+
+/* Emits the `vextl.qu.du vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextl_qu_du(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTL_QU_DU, vd, vj));
+}
+
+/* Emits the `vbitclri.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITCLRI_B, vd, vj, uk3));
+}
+
+/* Emits the `vbitclri.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITCLRI_H, vd, vj, uk4));
+}
+
+/* Emits the `vbitclri.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITCLRI_W, vd, vj, uk5));
+}
+
+/* Emits the `vbitclri.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITCLRI_D, vd, vj, uk6));
+}
+
+/* Emits the `vbitseti.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITSETI_B, vd, vj, uk3));
+}
+
+/* Emits the `vbitseti.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITSETI_H, vd, vj, uk4));
+}
+
+/* Emits the `vbitseti.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITSETI_W, vd, vj, uk5));
+}
+
+/* Emits the `vbitseti.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITSETI_D, vd, vj, uk6));
+}
+
+/* Emits the `vbitrevi.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITREVI_B, vd, vj, uk3));
+}
+
+/* Emits the `vbitrevi.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITREVI_H, vd, vj, uk4));
+}
+
+/* Emits the `vbitrevi.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITREVI_W, vd, vj, uk5));
+}
+
+/* Emits the `vbitrevi.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITREVI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsat.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSAT_B, vd, vj, uk3));
+}
+
+/* Emits the `vsat.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSAT_H, vd, vj, uk4));
+}
+
+/* Emits the `vsat.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSAT_W, vd, vj, uk5));
+}
+
+/* Emits the `vsat.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSAT_D, vd, vj, uk6));
+}
+
+/* Emits the `vsat.bu vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSAT_BU, vd, vj, uk3));
+}
+
+/* Emits the `vsat.hu vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSAT_HU, vd, vj, uk4));
+}
+
+/* Emits the `vsat.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSAT_WU, vd, vj, uk5));
+}
+
+/* Emits the `vsat.du vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSAT_DU, vd, vj, uk6));
+}
+
+/* Emits the `vslli.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLI_B, vd, vj, uk3));
+}
+
+/* Emits the `vslli.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLI_H, vd, vj, uk4));
+}
+
+/* Emits the `vslli.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLI_W, vd, vj, uk5));
+}
+
+/* Emits the `vslli.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSLLI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrli.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRLI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrli.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrli.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrli.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrai.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrai_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRAI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrai.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrai_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRAI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrai.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrai_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRAI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrai.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrai_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRAI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrlni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrlni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrlni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrlni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRLNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vsrlrni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLRNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrlrni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLRNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrlrni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLRNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrlrni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRLRNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlni.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLNI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlni.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLNI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlni.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLNI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlni.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLNI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlrni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLRNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlrni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLRNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlrni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLRNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlrni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLRNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlrni.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLRNI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlrni.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLRNI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlrni.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLRNI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlrni.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLRNI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vsrani.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRANI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrani.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRANI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrani.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRANI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrani.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRANI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vsrarni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRARNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrarni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRARNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrarni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRARNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrarni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRARNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrani.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRANI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrani.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRANI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrani.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRANI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrani.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRANI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrani.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRANI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrani.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRANI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrani.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRANI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrani.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRANI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrarni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRARNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrarni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRARNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrarni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRARNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrarni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRARNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrarni.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRARNI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrarni.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRARNI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrarni.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRARNI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrarni.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRARNI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vextrins.d vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_D, vd, vj, uk8));
+}
+
+/* Emits the `vextrins.w vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_W, vd, vj, uk8));
+}
+
+/* Emits the `vextrins.h vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_H, vd, vj, uk8));
+}
+
+/* Emits the `vextrins.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_B, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_B, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.h vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_H, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.w vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_W, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.d vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_D, vd, vj, uk8));
+}
+
+/* Emits the `vbitseli.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VBITSELI_B, vd, vj, uk8));
+}
+
+/* Emits the `vandi.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vandi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VANDI_B, vd, vj, uk8));
+}
+
+/* Emits the `vori.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VORI_B, vd, vj, uk8));
+}
+
+/* Emits the `vxori.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vxori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VXORI_B, vd, vj, uk8));
+}
+
+/* Emits the `vnori.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vnori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VNORI_B, vd, vj, uk8));
+}
+
+/* Emits the `vldi vd, sj13` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldi(TCGContext *s, TCGReg vd, int32_t sj13)
+{
+    tcg_out32(s, encode_vdsj13_insn(OPC_VLDI, vd, sj13));
+}
+
+/* Emits the `vpermi.w vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpermi_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VPERMI_W, vd, vj, uk8));
+}
+
+/* End of generated code.  */
diff --git a/qemu/tcg/loongarch64/tcg-target.h b/qemu/tcg/loongarch64/tcg-target.h
new file mode 100644
index 0000000000..60990426e6
--- /dev/null
+++ b/qemu/tcg/loongarch64/tcg-target.h
@@ -0,0 +1,228 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2021 WANG Xuerui <git@xen0n.name>
+ *
+ * Based on tcg/riscv/tcg-target.h
+ *
+ * Copyright (c) 2018 SiFive, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef LOONGARCH_TCG_TARGET_H
+#define LOONGARCH_TCG_TARGET_H
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_NB_REGS 64
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+
+/*
+ * Loongson removed the (incomplete) 32-bit support from kernel and toolchain
+ * for the initial upstreaming of this architecture, so don't bother and just
+ * support the LP64* ABI for now.
+ */
+#if defined(__loongarch64)
+# define TCG_TARGET_REG_BITS 64
+#else
+# error unsupported LoongArch register size
+#endif
+
+#define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+
+typedef enum {
+    TCG_REG_ZERO,
+    TCG_REG_RA,
+    TCG_REG_TP,
+    TCG_REG_SP,
+    TCG_REG_A0,
+    TCG_REG_A1,
+    TCG_REG_A2,
+    TCG_REG_A3,
+    TCG_REG_A4,
+    TCG_REG_A5,
+    TCG_REG_A6,
+    TCG_REG_A7,
+    TCG_REG_T0,
+    TCG_REG_T1,
+    TCG_REG_T2,
+    TCG_REG_T3,
+    TCG_REG_T4,
+    TCG_REG_T5,
+    TCG_REG_T6,
+    TCG_REG_T7,
+    TCG_REG_T8,
+    TCG_REG_RESERVED,
+    TCG_REG_S9,
+    TCG_REG_S0,
+    TCG_REG_S1,
+    TCG_REG_S2,
+    TCG_REG_S3,
+    TCG_REG_S4,
+    TCG_REG_S5,
+    TCG_REG_S6,
+    TCG_REG_S7,
+    TCG_REG_S8,
+
+    TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+    TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+    TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
+    /* aliases */
+    TCG_AREG0    = TCG_REG_S0,
+    TCG_REG_TMP0 = TCG_REG_T8,
+    TCG_REG_TMP1 = TCG_REG_T7,
+    TCG_REG_TMP2 = TCG_REG_T6,
+    TCG_VEC_TMP0 = TCG_REG_V23,
+} TCGReg;
+
+extern bool use_lsx_instructions;
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK              TCG_REG_SP
+#define TCG_TARGET_STACK_ALIGN          16
+#define TCG_TARGET_CALL_STACK_OFFSET    0
+#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
+
+/* optional instructions */
+#define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_negsetcond_i32   0
+#define TCG_TARGET_HAS_div_i32          1
+#define TCG_TARGET_HAS_rem_i32          1
+#define TCG_TARGET_HAS_div2_i32         0
+#define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract2_i32     0
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muluh_i32        1
+#define TCG_TARGET_HAS_mulsh_i32        1
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8u_i32        1
+#define TCG_TARGET_HAS_ext16u_i32       1
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_neg_i32          0
+#define TCG_TARGET_HAS_andc_i32         1
+#define TCG_TARGET_HAS_orc_i32          1
+#define TCG_TARGET_HAS_eqv_i32          0
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          1
+#define TCG_TARGET_HAS_clz_i32          1
+#define TCG_TARGET_HAS_ctz_i32          1
+#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_brcond2          0
+#define TCG_TARGET_HAS_setcond2         0
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+#define TCG_TARGET_HAS_goto_ptr         1
+#define TCG_TARGET_HAS_extrl_i64_i32    0
+#define TCG_TARGET_HAS_extrh_i64_i32    0
+
+/* 64-bit operations */
+#define TCG_TARGET_HAS_movcond_i64      1
+#define TCG_TARGET_HAS_negsetcond_i64   0
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          1
+#define TCG_TARGET_HAS_div2_i64         0
+#define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      1
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_extr_i64_i32     1
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        1
+#define TCG_TARGET_HAS_ext16u_i64       1
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_neg_i64          0
+#define TCG_TARGET_HAS_andc_i64         1
+#define TCG_TARGET_HAS_orc_i64          1
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          1
+#define TCG_TARGET_HAS_clz_i64          1
+#define TCG_TARGET_HAS_ctz_i64          1
+#define TCG_TARGET_HAS_ctpop_i64        0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        1
+#define TCG_TARGET_HAS_mulsh_i64        1
+#define TCG_TARGET_HAS_direct_jump      0
+
+#define TCG_TARGET_HAS_qemu_ldst_i128   use_lsx_instructions
+
+#define TCG_TARGET_HAS_v64              0
+#define TCG_TARGET_HAS_v128             use_lsx_instructions
+#define TCG_TARGET_HAS_v256             0
+
+#define TCG_TARGET_HAS_not_vec          1
+#define TCG_TARGET_HAS_neg_vec          1
+#define TCG_TARGET_HAS_abs_vec          0
+#define TCG_TARGET_HAS_andc_vec         1
+#define TCG_TARGET_HAS_orc_vec          1
+#define TCG_TARGET_HAS_nand_vec         0
+#define TCG_TARGET_HAS_nor_vec          1
+#define TCG_TARGET_HAS_eqv_vec          0
+#define TCG_TARGET_HAS_mul_vec          1
+#define TCG_TARGET_HAS_shi_vec          1
+#define TCG_TARGET_HAS_shs_vec          0
+#define TCG_TARGET_HAS_shv_vec          1
+#define TCG_TARGET_HAS_roti_vec         1
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         1
+#define TCG_TARGET_HAS_sat_vec          1
+#define TCG_TARGET_HAS_minmax_vec       1
+#define TCG_TARGET_HAS_bitsel_vec       1
+#define TCG_TARGET_HAS_cmpsel_vec       0
+
+#define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP     0
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+    __builtin___clear_cache((char *)start, (char *)stop);
+}
+
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
+
+#define TCG_TARGET_NEED_LDST_LABELS
+
+#endif /* LOONGARCH_TCG_TARGET_H */
diff --git a/qemu/tcg/loongarch64/tcg-target.inc.c b/qemu/tcg/loongarch64/tcg-target.inc.c
new file mode 100644
index 0000000000..aed5e007a6
--- /dev/null
+++ b/qemu/tcg/loongarch64/tcg-target.inc.c
@@ -0,0 +1,2681 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2021 WANG Xuerui <git@xen0n.name>
+ *
+ * Based on tcg/riscv/tcg-target.c.inc
+ *
+ * Copyright (c) 2018 SiFive, Inc
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "../tcg-ldst.inc.c"
+#include <asm/hwcap.h>
+
+bool use_lsx_instructions;
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+    "zero",
+    "ra",
+    "tp",
+    "sp",
+    "a0",
+    "a1",
+    "a2",
+    "a3",
+    "a4",
+    "a5",
+    "a6",
+    "a7",
+    "t0",
+    "t1",
+    "t2",
+    "t3",
+    "t4",
+    "t5",
+    "t6",
+    "t7",
+    "t8",
+    "r21", /* reserved in the LP64* ABI, hence no ABI name */
+    "s9",
+    "s0",
+    "s1",
+    "s2",
+    "s3",
+    "s4",
+    "s5",
+    "s6",
+    "s7",
+    "s8",
+    "vr0",
+    "vr1",
+    "vr2",
+    "vr3",
+    "vr4",
+    "vr5",
+    "vr6",
+    "vr7",
+    "vr8",
+    "vr9",
+    "vr10",
+    "vr11",
+    "vr12",
+    "vr13",
+    "vr14",
+    "vr15",
+    "vr16",
+    "vr17",
+    "vr18",
+    "vr19",
+    "vr20",
+    "vr21",
+    "vr22",
+    "vr23",
+    "vr24",
+    "vr25",
+    "vr26",
+    "vr27",
+    "vr28",
+    "vr29",
+    "vr30",
+    "vr31",
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+    /* Registers preserved across calls */
+    /* TCG_REG_S0 reserved for TCG_AREG0 */
+    TCG_REG_S1,
+    TCG_REG_S2,
+    TCG_REG_S3,
+    TCG_REG_S4,
+    TCG_REG_S5,
+    TCG_REG_S6,
+    TCG_REG_S7,
+    TCG_REG_S8,
+    TCG_REG_S9,
+
+    /* Registers (potentially) clobbered across calls */
+    TCG_REG_T0,
+    TCG_REG_T1,
+    TCG_REG_T2,
+    TCG_REG_T3,
+    TCG_REG_T4,
+    TCG_REG_T5,
+    TCG_REG_T6,
+    TCG_REG_T7,
+    TCG_REG_T8,
+
+    /* Argument registers, opposite order of allocation.  */
+    TCG_REG_A7,
+    TCG_REG_A6,
+    TCG_REG_A5,
+    TCG_REG_A4,
+    TCG_REG_A3,
+    TCG_REG_A2,
+    TCG_REG_A1,
+    TCG_REG_A0,
+
+    /* Vector registers */
+    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+    TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+    TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+    /* V24 - V31 are caller-saved, and skipped.  */
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+    TCG_REG_A0,
+    TCG_REG_A1,
+    TCG_REG_A2,
+    TCG_REG_A3,
+    TCG_REG_A4,
+    TCG_REG_A5,
+    TCG_REG_A6,
+    TCG_REG_A7,
+};
+
+static const TCGReg tcg_target_call_oarg_regs[2] = {
+    TCG_REG_A0,
+    TCG_REG_A1
+};
+
+#ifndef CONFIG_SOFTMMU
+#define USE_GUEST_BASE     (guest_base != 0)
+#define TCG_GUEST_BASE_REG TCG_REG_S1
+#endif
+
+#define TCG_CT_CONST_ZERO  0x100
+#define TCG_CT_CONST_S12   0x200
+#define TCG_CT_CONST_S32   0x400
+#define TCG_CT_CONST_U12   0x800
+#define TCG_CT_CONST_C12   0x1000
+#define TCG_CT_CONST_WSZ   0x2000
+#define TCG_CT_CONST_VCMP  0x4000
+#define TCG_CT_CONST_VADD  0x8000
+
+#define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
+#define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
+
+static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
+{
+    return sextract64(val, pos, len);
+}
+
+/* test if a constant matches the constraint */
+static inline int tcg_target_const_match(tcg_target_long val, TCGType type, 
+                                         const TCGArgConstraint *arg_ct)
+{
+    int ct;
+    ct = arg_ct->ct;
+    if (ct & TCG_CT_CONST) {
+        return true;
+    }
+    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+        return true;
+    }
+    if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
+        return true;
+    }
+    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+        return true;
+    }
+    if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) {
+        return true;
+    }
+    if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) {
+        return true;
+    }
+    if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+        return true;
+    }
+#if 0
+    int64_t vec_val = sextract64(val, 0, 8 << vece);
+    if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) {
+        return true;
+    }
+    if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) {
+        return true;
+    }
+#else
+    /* tcg does not pass vece to us */
+    if ((ct & TCG_CT_CONST_VADD) || (ct & TCG_CT_CONST_VCMP)) {
+        return true;
+    }
+#endif
+
+    return false;
+}
+
+/* parse target specific constraints */
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+                                           const char *ct_str, TCGType type)
+{
+    switch(*ct_str++) {
+    case 'r':
+        ct->ct |= TCG_CT_REG;
+        ct->u.regs = ALL_GENERAL_REGS;
+        break;
+    case 'l':
+        ct->ct |= TCG_CT_REG;
+        ct->u.regs = ALL_GENERAL_REGS;
+#ifdef CONFIG_SOFTMMU
+        tcg_regset_reset_reg(ct->u.regs, TCG_AREG0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_TMP0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_TMP1);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_TMP2);
+#endif
+        break;
+    case 'w':
+        ct->ct |= TCG_CT_REG;
+        ct->u.regs = ALL_VECTOR_REGS;
+        break;
+    case 'I': 
+        ct->ct |= TCG_CT_CONST_S12;
+        break;
+    case 'J':
+        ct->ct |= TCG_CT_CONST_S32;
+        break;
+    case 'U':
+        ct->ct |= TCG_CT_CONST_U12;
+        break;
+    case 'Z':
+        ct->ct |= TCG_CT_CONST_ZERO;
+        break;
+    case 'C':
+        ct->ct |= TCG_CT_CONST_C12;
+        break;
+    case 'W':
+        ct->ct |= TCG_CT_CONST_WSZ;
+        break;
+    case 'M':
+        ct->ct |= TCG_CT_CONST_VCMP;
+        break;
+    case 'A':
+        ct->ct |= TCG_CT_CONST_VADD;
+        break;
+    default:
+        return NULL;
+    }
+    return ct_str;
+}
+
+/*
+ * Relocations
+ */
+
+/*
+ * Relocation records defined in LoongArch ELF psABI v1.00 is way too
+ * complicated; a whopping stack machine is needed to stuff the fields, at
+ * the very least one SOP_PUSH and one SOP_POP (of the correct format) are
+ * needed.
+ *
+ * Hence, define our own simpler relocation types. Numbers are chosen as to
+ * not collide with potential future additions to the true ELF relocation
+ * type enum.
+ */
+
+/* Field Sk16, shifted right by 2; suitable for conditional jumps */
+#define R_LOONGARCH_BR_SK16     256
+/* Field Sd10k16, shifted right by 2; suitable for B and BL */
+#define R_LOONGARCH_BR_SD10K16  257
+
+static bool reloc_br_sk16(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+    intptr_t offset = (intptr_t)target - (intptr_t)src_rw;
+
+    tcg_debug_assert((offset & 3) == 0);
+    offset >>= 2;
+    if (offset == sextreg(offset, 0, 16)) {
+        *src_rw = deposit64(*src_rw, 10, 16, offset);
+        return true;
+    }
+
+    return false;
+}
+
+static bool reloc_br_sd10k16(tcg_insn_unit *src_rw,
+                             const tcg_insn_unit *target)
+{
+    intptr_t offset = (intptr_t)target - (intptr_t)src_rw;
+
+    tcg_debug_assert((offset & 3) == 0);
+    offset >>= 2;
+    if (offset == sextreg(offset, 0, 26)) {
+        *src_rw = deposit64(*src_rw, 0, 10, offset >> 16); /* slot d10 */
+        *src_rw = deposit64(*src_rw, 10, 16, offset); /* slot k16 */
+        return true;
+    }
+
+    return false;
+}
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+                        intptr_t value, intptr_t addend)
+{
+    tcg_debug_assert(addend == 0);
+    switch (type) {
+    case R_LOONGARCH_BR_SK16:
+        return reloc_br_sk16(code_ptr, (tcg_insn_unit *)value);
+    case R_LOONGARCH_BR_SD10K16:
+        return reloc_br_sd10k16(code_ptr, (tcg_insn_unit *)value);
+    default:
+        g_assert_not_reached();
+    }
+}
+
+#include "tcg-insn-defs.c.inc"
+
+/*
+ * TCG intrinsics
+ */
+
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    /* Baseline LoongArch only has the full barrier, unfortunately.  */
+    tcg_out_opc_dbar(s, 0);
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+    if (ret == arg) {
+        return true;
+    }
+    switch (type) {
+    case TCG_TYPE_I32:
+    case TCG_TYPE_I64:
+        /*
+         * Conventional register-register move used in LoongArch is
+         * `or dst, src, zero`.
+         */
+        tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return true;
+}
+
+/* Loads a 32-bit immediate into rd, sign-extended.  */
+static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
+{
+    tcg_target_long lo = sextreg(val, 0, 12);
+    tcg_target_long hi12 = sextreg(val, 12, 20);
+
+    /* Single-instruction cases.  */
+    if (hi12 == 0) {
+        /* val fits in uimm12: ori rd, zero, val */
+        tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
+        return;
+    }
+    if (hi12 == sextreg(lo, 12, 20)) {
+        /* val fits in simm12: addi.w rd, zero, val */
+        tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
+        return;
+    }
+
+    /* High bits must be set; load with lu12i.w + optional ori.  */
+    tcg_out_opc_lu12i_w(s, rd, hi12);
+    if (lo != 0) {
+        tcg_out_opc_ori(s, rd, rd, lo & 0xfff);
+    }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
+                         tcg_target_long val)
+{
+    /*
+     * LoongArch conventionally loads 64-bit immediates in at most 4 steps,
+     * with dedicated instructions for filling the respective bitfields
+     * below:
+     *
+     *        6                   5                   4               3
+     *  3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+     * +-----------------------+---------------------------------------+...
+     * |          hi52         |                  hi32                 |
+     * +-----------------------+---------------------------------------+...
+     *       3                   2                   1
+     *     1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+     * ...+-------------------------------------+-------------------------+
+     *    |                 hi12                |            lo           |
+     * ...+-------------------------------------+-------------------------+
+     *
+     * Check if val belong to one of the several fast cases, before falling
+     * back to the slow path.
+     */
+
+    intptr_t pc_offset;
+    tcg_target_long val_lo, val_hi, pc_hi, offset_hi;
+    tcg_target_long hi12, hi32, hi52;
+
+    /* Value fits in signed i32.  */
+    if (type == TCG_TYPE_I32 || val == (int32_t)val) {
+        tcg_out_movi_i32(s, rd, val);
+        return;
+    }
+
+    /* PC-relative cases.  */
+    pc_offset = tcg_pcrel_diff(s, (void *)val);
+    if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) {
+        /* Single pcaddu2i.  */
+        tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
+        return;
+    }
+
+    if (pc_offset == (int32_t)pc_offset) {
+        /* Offset within 32 bits; load with pcalau12i + ori.  */
+        val_lo = sextreg(val, 0, 12);
+        val_hi = val >> 12;
+        pc_hi = (val - pc_offset) >> 12;
+        offset_hi = val_hi - pc_hi;
+
+        tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20));
+        tcg_out_opc_pcalau12i(s, rd, offset_hi);
+        if (val_lo != 0) {
+            tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff);
+        }
+        return;
+    }
+
+    hi12 = sextreg(val, 12, 20);
+    hi32 = sextreg(val, 32, 20);
+    hi52 = sextreg(val, 52, 12);
+
+    /* Single cu52i.d case.  */
+    if ((hi52 != 0) && (ctz64(val) >= 52)) {
+        tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52);
+        return;
+    }
+
+    /* Slow path.  Initialize the low 32 bits, then concat high bits.  */
+    tcg_out_movi_i32(s, rd, val);
+
+    /* Load hi32 and hi52 explicitly when they are unexpected values. */
+    if (hi32 != sextreg(hi12, 20, 20)) {
+        tcg_out_opc_cu32i_d(s, rd, hi32);
+    }
+
+    if (hi52 != sextreg(hi32, 20, 12)) {
+        tcg_out_opc_cu52i_d(s, rd, rd, hi52);
+    }
+}
+
+static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd,
+                         TCGReg rs, tcg_target_long imm)
+{
+    tcg_target_long lo12 = sextreg(imm, 0, 12);
+    tcg_target_long hi16 = sextreg(imm - lo12, 16, 16);
+
+    /*
+     * Note that there's a hole in between hi16 and lo12:
+     *
+     *       3                   2                   1                   0
+     *     1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+     * ...+-------------------------------+-------+-----------------------+
+     *    |             hi16              |       |          lo12         |
+     * ...+-------------------------------+-------+-----------------------+
+     *
+     * For bits within that hole, it's more efficient to use LU12I and ADD.
+     */
+    if (imm == (hi16 << 16) + lo12) {
+        if (hi16) {
+            tcg_out_opc_addu16i_d(s, rd, rs, hi16);
+            rs = rd;
+        }
+        if (type == TCG_TYPE_I32) {
+            tcg_out_opc_addi_w(s, rd, rs, lo12);
+        } else if (lo12) {
+            tcg_out_opc_addi_d(s, rd, rs, lo12);
+        } else {
+            tcg_out_mov(s, type, rd, rs);
+        }
+    } else {
+        tcg_out_movi(s, type, TCG_REG_TMP0, imm);
+        if (type == TCG_TYPE_I32) {
+            tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0);
+        } else {
+            tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0);
+        }
+    }
+}
+
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
+{
+    return false;
+}
+
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
+                             tcg_target_long imm)
+{
+    /* This function is only used for passing structs by reference. */
+    g_assert_not_reached();
+}
+
+static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_andi(s, ret, arg, 0xff);
+}
+
+static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_bstrpick_w(s, ret, arg, 0, 15);
+}
+
+static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_bstrpick_d(s, ret, arg, 0, 31);
+}
+
+static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_sext_b(s, ret, arg);
+}
+
+static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_sext_h(s, ret, arg);
+}
+
+static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_opc_addi_w(s, ret, arg, 0);
+}
+
+static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    if (ret != arg) {
+        tcg_out_ext32s(s, ret, arg);
+    }
+}
+
+static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_ext32u(s, ret, arg);
+}
+
+static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    tcg_out_ext32s(s, ret, arg);
+}
+
+static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
+                           TCGReg a0, TCGReg a1, TCGReg a2,
+                           bool c2, bool is_32bit)
+{
+    if (c2) {
+        /*
+         * Fast path: semantics already satisfied due to constraint and
+         * insn behavior, single instruction is enough.
+         */
+        tcg_debug_assert(a2 == (is_32bit ? 32 : 64));
+        /* all clz/ctz insns belong to DJ-format */
+        tcg_out32(s, encode_dj_insn(opc, a0, a1));
+        return;
+    }
+
+    tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1));
+    /* a0 = a1 ? REG_TMP0 : a2 */
+    tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1);
+    tcg_out_opc_masknez(s, a0, a2, a1);
+    tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
+}
+
+#define SETCOND_INV    TCG_TARGET_NB_REGS
+#define SETCOND_NEZ    (SETCOND_INV << 1)
+#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
+
+static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
+                               TCGReg arg1, tcg_target_long arg2, bool c2)
+{
+    int flags = 0;
+
+    switch (cond) {
+    case TCG_COND_EQ:    /* -> NE  */
+    case TCG_COND_GE:    /* -> LT  */
+    case TCG_COND_GEU:   /* -> LTU */
+    case TCG_COND_GT:    /* -> LE  */
+    case TCG_COND_GTU:   /* -> LEU */
+        cond = tcg_invert_cond(cond);
+        flags ^= SETCOND_INV;
+        break;
+    default:
+        break;
+    }
+
+    switch (cond) {
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+        /*
+         * If we have a constant input, the most efficient way to implement
+         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
+         * We don't need to care for this for LE because the constant input
+         * is still constrained to int32_t, and INT32_MAX+1 is representable
+         * in the 64-bit temporary register.
+         */
+        if (c2) {
+            if (cond == TCG_COND_LEU) {
+                /* unsigned <= -1 is true */
+                if (arg2 == -1) {
+                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
+                    return ret;
+                }
+                cond = TCG_COND_LTU;
+            } else {
+                cond = TCG_COND_LT;
+            }
+            arg2 += 1;
+        } else {
+            TCGReg tmp = arg2;
+            arg2 = arg1;
+            arg1 = tmp;
+            cond = tcg_swap_cond(cond);    /* LE -> GE */
+            cond = tcg_invert_cond(cond);  /* GE -> LT */
+            flags ^= SETCOND_INV;
+        }
+        break;
+    default:
+        break;
+    }
+
+    switch (cond) {
+    case TCG_COND_NE:
+        flags |= SETCOND_NEZ;
+        if (!c2) {
+            tcg_out_opc_xor(s, ret, arg1, arg2);
+        } else if (arg2 == 0) {
+            ret = arg1;
+        } else if (arg2 >= 0 && arg2 <= 0xfff) {
+            tcg_out_opc_xori(s, ret, arg1, arg2);
+        } else {
+            tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2);
+        }
+        break;
+
+    case TCG_COND_LT:
+    case TCG_COND_LTU:
+        if (c2) {
+            if (arg2 >= -0x800 && arg2 <= 0x7ff) {
+                if (cond == TCG_COND_LT) {
+                    tcg_out_opc_slti(s, ret, arg1, arg2);
+                } else {
+                    tcg_out_opc_sltui(s, ret, arg1, arg2);
+                }
+                break;
+            }
+            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
+            arg2 = TCG_REG_TMP0;
+        }
+        if (cond == TCG_COND_LT) {
+            tcg_out_opc_slt(s, ret, arg1, arg2);
+        } else {
+            tcg_out_opc_sltu(s, ret, arg1, arg2);
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+        break;
+    }
+
+    return ret | flags;
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGReg arg1, tcg_target_long arg2, bool c2)
+{
+    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
+
+    if (tmpflags != ret) {
+        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
+
+        switch (tmpflags & SETCOND_FLAGS) {
+        case SETCOND_INV:
+            /* Intermediate result is boolean: simply invert. */
+            tcg_out_opc_xori(s, ret, tmp, 1);
+            break;
+        case SETCOND_NEZ:
+            /* Intermediate result is zero/non-zero: test != 0. */
+            tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
+            break;
+        case SETCOND_NEZ | SETCOND_INV:
+            /* Intermediate result is zero/non-zero: test == 0. */
+            tcg_out_opc_sltui(s, ret, tmp, 1);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+}
+
+static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGReg c1, tcg_target_long c2, bool const2,
+                            TCGReg v1, TCGReg v2)
+{
+    int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2);
+    TCGReg t;
+
+    /* Standardize the test below to t != 0. */
+    if (tmpflags & SETCOND_INV) {
+        t = v1, v1 = v2, v2 = t;
+    }
+
+    t = tmpflags & ~SETCOND_FLAGS;
+    if (v1 == TCG_REG_ZERO) {
+        tcg_out_opc_masknez(s, ret, v2, t);
+    } else if (v2 == TCG_REG_ZERO) {
+        tcg_out_opc_maskeqz(s, ret, v1, t);
+    } else {
+        tcg_out_opc_masknez(s, TCG_REG_TMP2, v2, t); /* t ? 0 : v2 */
+        tcg_out_opc_maskeqz(s, TCG_REG_TMP1, v1, t); /* t ? v1 : 0 */
+        tcg_out_opc_or(s, ret, TCG_REG_TMP1, TCG_REG_TMP2);
+    }
+}
+
+/*
+ * Branch helpers
+ */
+
+static const struct {
+    LoongArchInsn op;
+    bool swap;
+} tcg_brcond_to_loongarch[] = {
+    [TCG_COND_EQ] =  { OPC_BEQ,  false },
+    [TCG_COND_NE] =  { OPC_BNE,  false },
+    [TCG_COND_LT] =  { OPC_BGT,  true  },
+    [TCG_COND_GE] =  { OPC_BLE,  true  },
+    [TCG_COND_LE] =  { OPC_BLE,  false },
+    [TCG_COND_GT] =  { OPC_BGT,  false },
+    [TCG_COND_LTU] = { OPC_BGTU, true  },
+    [TCG_COND_GEU] = { OPC_BLEU, true  },
+    [TCG_COND_LEU] = { OPC_BLEU, false },
+    [TCG_COND_GTU] = { OPC_BGTU, false }
+};
+
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+                           TCGReg arg2, TCGLabel *l)
+{
+    LoongArchInsn op = tcg_brcond_to_loongarch[cond].op;
+
+    tcg_debug_assert(op != 0);
+
+    if (tcg_brcond_to_loongarch[cond].swap) {
+        TCGReg t = arg1;
+        arg1 = arg2;
+        arg2 = t;
+    }
+
+    /* all conditional branch insns belong to DJSk16-format */
+    tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SK16, l, 0);
+    tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0));
+}
+
+static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
+{
+    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
+    ptrdiff_t offset = tcg_pcrel_diff(s, (void *)arg);
+
+    tcg_debug_assert((offset & 3) == 0);
+    if (offset == sextreg(offset, 0, 28)) {
+        /* short jump: +/- 256MiB */
+        if (tail) {
+            tcg_out_opc_b(s, offset >> 2);
+        } else {
+            tcg_out_opc_bl(s, offset >> 2);
+        }
+    } else if (offset == sextreg(offset, 0, 38)) {
+        /* long jump: +/- 256GiB */
+        tcg_target_long lo = sextreg(offset, 0, 18);
+        tcg_target_long hi = offset - lo;
+        tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, hi >> 18);
+        tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2);
+    } else {
+        /* far jump: 64-bit */
+        tcg_target_long lo = sextreg((tcg_target_long)arg, 0, 18);
+        tcg_target_long hi = (tcg_target_long)arg - lo;
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, hi);
+        tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2);
+    }
+}
+
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
+{
+    tcg_out_call_int(s, target, false);
+}
+
+/*
+ * Load/store helpers
+ */
+
+static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
+                         TCGReg addr, intptr_t offset)
+{
+    intptr_t imm12 = sextreg(offset, 0, 12);
+
+    if (offset != imm12) {
+        intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
+
+        if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
+            imm12 = sextreg(diff, 0, 12);
+            tcg_out_opc_pcaddu12i(s, TCG_REG_TMP2, (diff - imm12) >> 12);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
+            if (addr != TCG_REG_ZERO) {
+                tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, addr);
+            }
+        }
+        addr = TCG_REG_TMP2;
+    }
+
+    switch (opc) {
+    case OPC_LD_B:
+    case OPC_LD_BU:
+    case OPC_LD_H:
+    case OPC_LD_HU:
+    case OPC_LD_W:
+    case OPC_LD_WU:
+    case OPC_LD_D:
+    case OPC_ST_B:
+    case OPC_ST_H:
+    case OPC_ST_W:
+    case OPC_ST_D:
+        tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12));
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
+{
+    bool is_32bit = type == TCG_TYPE_I32;
+    tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2);
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
+{
+    bool is_32bit = type == TCG_TYPE_I32;
+    tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2);
+}
+
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+                        TCGReg base, intptr_t ofs)
+{
+    if (val == 0) {
+        tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
+        return true;
+    }
+    return false;
+}
+
+/*
+ * Load/store helpers for SoftMMU, and qemu_ld/st implementations
+ */
+static void * const qemu_ld_helpers[16] = {
+    [MO_UB]   = helper_ret_ldub_mmu,
+    [MO_SB]   = helper_ret_ldsb_mmu,
+    [MO_LEUW] = helper_le_lduw_mmu,
+    [MO_LESW] = helper_le_ldsw_mmu,
+    [MO_LEUL] = helper_le_ldul_mmu,
+    [MO_LEQ]  = helper_le_ldq_mmu,
+    [MO_BEUW] = helper_be_lduw_mmu,
+    [MO_BESW] = helper_be_ldsw_mmu,
+    [MO_BEUL] = helper_be_ldul_mmu,
+    [MO_BEQ]  = helper_be_ldq_mmu,
+#if TCG_TARGET_REG_BITS == 64
+    [MO_LESL] = helper_le_ldsl_mmu,
+    [MO_BESL] = helper_be_ldsl_mmu,
+#endif
+};
+
+static void * const qemu_st_helpers[16] = {
+    [MO_UB]   = helper_ret_stb_mmu,
+    [MO_LEUW] = helper_le_stw_mmu,
+    [MO_LEUL] = helper_le_stl_mmu,
+    [MO_LEQ]  = helper_le_stq_mmu,
+    [MO_BEUW] = helper_be_stw_mmu,
+    [MO_BEUL] = helper_be_stl_mmu,
+    [MO_BEQ]  = helper_be_stq_mmu,
+};
+
+/* Helper routines for marshalling helper function arguments into
+ * the correct registers and stack.
+ * I is where we want to put this argument, and is updated and returned
+ * for the next call. ARG is the argument itself.
+ *
+ * We provide routines for arguments which are: immediate, 32 bit
+ * value in register, 16 and 8 bit values in register (which must be zero
+ * extended before use).
+ */
+
+static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
+{
+    if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
+    } 
+    return i + 1;
+}
+
+static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg)
+{
+    TCGReg tmp = TCG_REG_TMP0;
+    if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+        tmp = tcg_target_call_iarg_regs[i];
+    }
+    tcg_out_opc_andi(s, tmp, arg, 0xff);
+    return tcg_out_call_iarg_reg(s, i, tmp);
+}
+
+static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg)
+{
+    TCGReg tmp = TCG_REG_TMP0;
+    if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+        tmp = tcg_target_call_iarg_regs[i];
+    }
+    tcg_out_opc_andi(s, tmp, arg, 0xffff);
+    return tcg_out_call_iarg_reg(s, i, tmp);
+}
+
+static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
+{
+    TCGReg tmp = TCG_REG_TMP0;
+    if (arg == 0) {
+        tmp = TCG_REG_ZERO;
+    } else {
+        if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+            tmp = tcg_target_call_iarg_regs[i];
+        }
+        tcg_out_movi(s, TCG_TYPE_REG, tmp, arg);
+    }
+    return tcg_out_call_iarg_reg(s, i, tmp);
+}
+
+static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
+{
+    tcg_out_opc_b(s, 0);
+    return reloc_br_sd10k16(s->code_ptr - 1, target);
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    TCGMemOpIdx oi = l->oi;
+    MemOp opc = get_memop(oi);
+    MemOp size = opc & MO_SIZE;
+    TCGType type = l->type;
+
+    /* resolve label address */
+    if (!reloc_br_sk16(l->label_ptr[0], (s->code_ptr))) {
+        return false;
+    }
+
+    /* call load helper */
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr);
+
+    tcg_out_call(s, qemu_ld_helpers[size]);
+
+    switch (opc & MO_SSIZE) {
+    case MO_SB:
+        tcg_out_ext8s(s, type, l->datalo_reg, TCG_REG_A0);
+        break;
+    case MO_SW:
+        tcg_out_ext16s(s, type, l->datalo_reg, TCG_REG_A0);
+        break;
+    case MO_SL:
+        tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0);
+        break;
+    case MO_UL:
+        if (type == TCG_TYPE_I32) {
+            /* MO_UL loads of i32 should be sign-extended too */
+            tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0);
+            break;
+        }
+        /* fallthrough */
+    default:
+        tcg_out_mov(s, type, l->datalo_reg, TCG_REG_A0);
+        break;
+    }
+
+    return tcg_out_goto(s, l->raddr);
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    TCGMemOpIdx oi = l->oi;
+    MemOp opc = get_memop(oi);
+    MemOp size = opc & MO_SIZE;
+
+    /* resolve label address */
+    if (!reloc_br_sk16(l->label_ptr[0], (s->code_ptr))) {
+        return false;
+    }
+
+    /* call store helper */
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
+    switch (size) {
+    case MO_8:
+        tcg_out_ext8u(s, TCG_REG_A2, l->datalo_reg);
+        break;
+    case MO_16:
+        tcg_out_ext16u(s, TCG_REG_A2, l->datalo_reg);
+        break;
+    case MO_32:
+        tcg_out_ext32u(s, TCG_REG_A2, l->datalo_reg);
+        break;
+    case MO_64:
+        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_A2, l->datalo_reg);
+        break;
+    default:
+        g_assert_not_reached();
+        break;
+    }
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr);
+
+    tcg_out_call(s, qemu_st_helpers[size]);
+
+    return tcg_out_goto(s, l->raddr);
+}
+
+typedef struct {
+    MemOp atom;   /* lg2 bits of atomicity required */
+    MemOp align;  /* lg2 bits of alignment to use */
+} TCGAtomAlign;
+
+typedef struct {
+    TCGReg base;
+    TCGReg index;
+} HostAddress;
+
+// bool tcg_target_has_memory_bswap(MemOp memop)
+// {
+//     return false;
+// }
+
+/* We expect to use a 12-bit negative offset from ENV.  */
+#define MIN_TLB_MASK_TABLE_OFS  -(1 << 11)
+
+#if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
+static int tlb_mask_table_ofs(TCGContext *s, int which)
+{
+    return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
+            sizeof(CPUNegativeOffsetState));
+}
+#endif
+
+/*
+ * For softmmu, perform the TLB load and compare.
+ * For useronly, perform any required alignment tests.
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
+ * is required and fill in @h with the host address for the fast path.
+ */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+                                           TCGReg addr_reg, TCGMemOpIdx oi,
+                                           bool is_ld, TCGType addr_type)
+{
+#ifdef TARGET_ARM
+    struct uc_struct *uc = s->uc;
+#endif
+
+    TCGLabelQemuLdst *ldst = NULL;
+    MemOp opc = get_memop(oi);
+    MemOp a_bits = get_alignment_bits(opc);
+
+#ifdef CONFIG_SOFTMMU
+    unsigned s_bits = opc & MO_SIZE;
+    int mem_index = get_mmuidx(oi);
+    int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
+    int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
+    int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
+
+    ldst = new_ldst_label(s);
+    ldst->is_ld = is_ld;
+    ldst->oi = oi;
+    ldst->addrlo_reg = addr_reg;
+
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
+
+    tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
+                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+    tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
+    tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
+
+    /* Load the tlb comparator and the addend.  */
+    // QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
+    tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
+               is_ld ? offsetof(CPUTLBEntry, addr_read)
+                     : offsetof(CPUTLBEntry, addr_write));
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
+               offsetof(CPUTLBEntry, addend));
+
+    /*
+     * For aligned accesses, we check the first byte and include the alignment
+     * bits within the address.  For unaligned access, we check that we don't
+     * cross pages using the address of the last byte of the access.
+     */
+    if (a_bits < s_bits) {
+        unsigned a_mask = (1u << a_bits) - 1;
+        unsigned s_mask = (1u << s_bits) - 1;
+        tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
+    } else {
+        tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
+    }
+    tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
+                          a_bits, TARGET_PAGE_BITS - 1);
+
+    /* Compare masked address with the TLB entry.  */
+    ldst->label_ptr[0] = s->code_ptr;
+    // tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
+    tcg_out_opc_beq(s, 0, 0, 0);
+
+    h->index = TCG_REG_TMP2;
+#else
+    if (a_bits) {
+        ldst = new_ldst_label(s);
+
+        ldst->is_ld = is_ld;
+        ldst->oi = oi;
+        ldst->addrlo_reg = addr_reg;
+
+        /*
+         * Without micro-architecture details, we don't know which of
+         * bstrpick or andi is faster, so use bstrpick as it's not
+         * constrained by imm field width. Not to say alignments >= 2^12
+         * are going to happen any time soon.
+         */
+        tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
+
+        ldst->label_ptr[0] = s->code_ptr;
+        tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
+    }
+
+    h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
+#endif
+
+    if (addr_type == TCG_TYPE_I32) {
+        h->base = TCG_REG_TMP0;
+        tcg_out_ext32u(s, h->base, addr_reg);
+    } else {
+        h->base = addr_reg;
+    }
+
+    return ldst;
+}
+
+// static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+//                                            TCGReg addr_reg, TCGMemOpIdx oi,
+//                                            bool is_ld, TCGType addr_type)
+// {
+//     TCGLabelQemuLdst *ldst = NULL;
+//     MemOp opc = get_memop(oi);
+//     unsigned a_bits = get_alignment_bits(opc);
+
+// #ifdef CONFIG_SOFTMMU
+//     unsigned s_bits = opc & MO_SIZE;
+//     int mem_index = get_mmuidx(oi);
+//     int table_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
+//     int mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+
+//     ldst = new_ldst_label(s);
+//     ldst->is_ld = is_ld;
+//     ldst->oi = oi;
+//     ldst->addrlo_reg = addr_reg;
+
+//     tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP0, mask);
+
+//     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
+
+//     tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
+//                     TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+//     tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
+//     tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
+
+//     /* Load the tlb comparator and the addend.  */
+//     tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
+//                is_ld ? offsetof(CPUTLBEntry, addr_read)
+//                      : offsetof(CPUTLBEntry, addr_write));
+//     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
+//                offsetof(CPUTLBEntry, addend));
+
+//     /*
+//      * For aligned accesses, we check the first byte and include the alignment
+//      * bits within the address.  For unaligned access, we check that we don't
+//      * cross pages using the address of the last byte of the access.
+//      */
+//     if (a_bits < s_bits) {
+//         unsigned a_mask = (1u << a_bits) - 1;
+//         unsigned s_mask = (1u << s_bits) - 1;
+//         tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
+//     } else {
+//         tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
+//     }
+//     tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
+//                           a_bits, TARGET_PAGE_BITS - 1);
+
+//     /* Compare masked address with the TLB entry.  */
+//     ldst->label_ptr[0] = s->code_ptr;
+//     tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
+
+//     h->index = TCG_REG_TMP2;
+// #else
+//     if (a_bits) {
+//         ldst = new_ldst_label(s);
+
+//         ldst->is_ld = is_ld;
+//         ldst->oi = oi;
+//         ldst->addrlo_reg = addr_reg;
+
+//         /*
+//          * Without micro-architecture details, we don't know which of
+//          * bstrpick or andi is faster, so use bstrpick as it's not
+//          * constrained by imm field width. Not to say alignments >= 2^12
+//          * are going to happen any time soon.
+//          */
+//         tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
+
+//         ldst->label_ptr[0] = s->code_ptr;
+//         tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
+//     }
+
+//     h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
+// #endif
+
+//     if (addr_type == TCG_TYPE_I32) {
+//         h->base = TCG_REG_TMP0;
+//         tcg_out_ext32u(s, h->base, addr_reg);
+//     } else {
+//         h->base = addr_reg;
+//     }
+
+//     return ldst;
+// }
+
+static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
+                                    TCGReg rd, HostAddress h)
+{
+    /* Byte swapping is left to middle-end expansion.  */
+    tcg_debug_assert((opc & MO_BSWAP) == 0);
+
+    switch (opc & MO_SSIZE) {
+    case MO_UB:
+        tcg_out_opc_ldx_bu(s, rd, h.base, h.index);
+        break;
+    case MO_SB:
+        tcg_out_opc_ldx_b(s, rd, h.base, h.index);
+        break;
+    case MO_UW:
+        tcg_out_opc_ldx_hu(s, rd, h.base, h.index);
+        break;
+    case MO_SW:
+        tcg_out_opc_ldx_h(s, rd, h.base, h.index);
+        break;
+    case MO_UL:
+        if (type == TCG_TYPE_I64) {
+            tcg_out_opc_ldx_wu(s, rd, h.base, h.index);
+            break;
+        }
+        /* fallthrough */
+    case MO_SL:
+        tcg_out_opc_ldx_w(s, rd, h.base, h.index);
+        break;
+    case MO_Q:
+        tcg_out_opc_ldx_d(s, rd, h.base, h.index);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
+                            TCGMemOpIdx oi, TCGType data_type)
+{
+    TCGLabelQemuLdst *ldst;
+    HostAddress h;
+
+    ldst = prepare_host_addr(s, &h, addr_reg, oi, true, data_type);
+    tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h);
+
+    if (ldst) {
+        ldst->type = data_type;
+        ldst->datalo_reg = data_reg;
+        ldst->raddr = s->code_ptr;
+    }
+}
+
+static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
+                                    TCGReg rd, HostAddress h)
+{
+    /* Byte swapping is left to middle-end expansion.  */
+    tcg_debug_assert((opc & MO_BSWAP) == 0);
+
+    switch (opc & MO_SIZE) {
+    case MO_8:
+        tcg_out_opc_stx_b(s, rd, h.base, h.index);
+        break;
+    case MO_16:
+        tcg_out_opc_stx_h(s, rd, h.base, h.index);
+        break;
+    case MO_32:
+        tcg_out_opc_stx_w(s, rd, h.base, h.index);
+        break;
+    case MO_64:
+        tcg_out_opc_stx_d(s, rd, h.base, h.index);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
+                            TCGMemOpIdx oi, TCGType data_type)
+{
+    TCGLabelQemuLdst *ldst;
+    HostAddress h;
+
+    ldst = prepare_host_addr(s, &h, addr_reg, oi, false, data_type);
+    tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h);
+
+    if (ldst) {
+        ldst->type = data_type;
+        ldst->datalo_reg = data_reg;
+        ldst->raddr = s->code_ptr;
+    }
+}
+
+/*
+ * Entry-points
+ */
+
+// static tcg_insn_unit *tcg_code_gen_epilogue;
+// static tcg_insn_unit *tb_ret_addr;
+
+static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
+{
+    /* Reuse the zeroing that exists for goto_ptr.  */
+    if (a0 == 0) {
+        tcg_out_call_int(s, s->code_gen_epilogue, true);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
+        tcg_out_call_int(s, s->tb_ret_addr, true);
+    }
+}
+
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
+                              uintptr_t addr)
+{
+    uintptr_t d_addr = addr;
+    ptrdiff_t d_disp = (ptrdiff_t)(d_addr - jmp_addr) >> 2;
+    tcg_insn_unit insn;
+
+    /* Either directly branch, or load slot address for indirect branch. */
+    if (d_disp == sextreg(d_disp, 0, 26)) {
+        insn = encode_sd10k16_insn(OPC_B, d_disp);
+    } else {
+        uintptr_t i_addr = addr;
+        intptr_t i_disp = i_addr - jmp_addr;
+        insn = encode_dsj20_insn(OPC_PCADDU2I, TCG_REG_TMP0, i_disp >> 2);
+    }
+
+    *(tcg_insn_unit *)jmp_addr =  insn;
+    // flush_idcache_range(jmp_rx, jmp_rw, 4);
+    flush_icache_range(jmp_addr, jmp_addr + 8);
+}
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                       const TCGArg args[TCG_MAX_OP_ARGS],
+                       const int const_args[TCG_MAX_OP_ARGS])
+{
+    TCGArg a0 = args[0];
+    TCGArg a1 = args[1];
+    TCGArg a2 = args[2];
+    TCGArg a3 = args[3];
+    int c2 = const_args[2];
+
+    switch (opc) {
+    case INDEX_op_mb:
+        tcg_out_mb(s, a0);
+        break;
+
+    case INDEX_op_goto_ptr:
+        tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0);
+        break;
+
+    case INDEX_op_br:
+        tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0),
+                      0);
+        tcg_out_opc_b(s, 0);
+        break;
+
+    case INDEX_op_brcond_i32:
+    case INDEX_op_brcond_i64:
+        tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
+        break;
+
+    case INDEX_op_extrh_i64_i32:
+        tcg_out_opc_srai_d(s, a0, a1, 32);
+        break;
+
+    case INDEX_op_not_i32:
+    case INDEX_op_not_i64:
+        tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO);
+        break;
+
+    case INDEX_op_nor_i32:
+    case INDEX_op_nor_i64:
+        if (c2) {
+            tcg_out_opc_ori(s, a0, a1, a2);
+            tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO);
+        } else {
+            tcg_out_opc_nor(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_andc_i32:
+    case INDEX_op_andc_i64:
+        if (c2) {
+            /* guaranteed to fit due to constraint */
+            tcg_out_opc_andi(s, a0, a1, ~a2);
+        } else {
+            tcg_out_opc_andn(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_orc_i32:
+    case INDEX_op_orc_i64:
+        if (c2) {
+            /* guaranteed to fit due to constraint */
+            tcg_out_opc_ori(s, a0, a1, ~a2);
+        } else {
+            tcg_out_opc_orn(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_and_i32:
+    case INDEX_op_and_i64:
+        if (c2) {
+            tcg_out_opc_andi(s, a0, a1, a2);
+        } else {
+            tcg_out_opc_and(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_or_i32:
+    case INDEX_op_or_i64:
+        if (c2) {
+            tcg_out_opc_ori(s, a0, a1, a2);
+        } else {
+            tcg_out_opc_or(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_xor_i32:
+    case INDEX_op_xor_i64:
+        if (c2) {
+            tcg_out_opc_xori(s, a0, a1, a2);
+        } else {
+            tcg_out_opc_xor(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_extract_i32:
+        tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1);
+        break;
+    case INDEX_op_extract_i64:
+        tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1);
+        break;
+
+    case INDEX_op_deposit_i32:
+        tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1);
+        break;
+    case INDEX_op_deposit_i64:
+        tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1);
+        break;
+
+    case INDEX_op_bswap16_i32:
+    case INDEX_op_bswap16_i64:
+        tcg_out_opc_revb_2h(s, a0, a1);
+        break;
+
+    case INDEX_op_bswap32_i32:
+        /* All 32-bit values are computed sign-extended in the register.  */
+        /* fallthrough */
+    case INDEX_op_bswap32_i64:
+        tcg_out_opc_revb_2w(s, a0, a1);
+        break;
+
+    case INDEX_op_bswap64_i64:
+        tcg_out_opc_revb_d(s, a0, a1);
+        break;
+
+    case INDEX_op_clz_i32:
+        tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true);
+        break;
+    case INDEX_op_clz_i64:
+        tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false);
+        break;
+
+    case INDEX_op_ctz_i32:
+        tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true);
+        break;
+    case INDEX_op_ctz_i64:
+        tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false);
+        break;
+
+    case INDEX_op_shl_i32:
+        if (c2) {
+            tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f);
+        } else {
+            tcg_out_opc_sll_w(s, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_shl_i64:
+        if (c2) {
+            tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f);
+        } else {
+            tcg_out_opc_sll_d(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_shr_i32:
+        if (c2) {
+            tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f);
+        } else {
+            tcg_out_opc_srl_w(s, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_shr_i64:
+        if (c2) {
+            tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f);
+        } else {
+            tcg_out_opc_srl_d(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_sar_i32:
+        if (c2) {
+            tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f);
+        } else {
+            tcg_out_opc_sra_w(s, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_sar_i64:
+        if (c2) {
+            tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f);
+        } else {
+            tcg_out_opc_sra_d(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_rotl_i32:
+        /* transform into equivalent rotr/rotri */
+        if (c2) {
+            tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f);
+        } else {
+            tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2);
+            tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0);
+        }
+        break;
+    case INDEX_op_rotl_i64:
+        /* transform into equivalent rotr/rotri */
+        if (c2) {
+            tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f);
+        } else {
+            tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2);
+            tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0);
+        }
+        break;
+
+    case INDEX_op_rotr_i32:
+        if (c2) {
+            tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f);
+        } else {
+            tcg_out_opc_rotr_w(s, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_rotr_i64:
+        if (c2) {
+            tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f);
+        } else {
+            tcg_out_opc_rotr_d(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_add_i32:
+        if (c2) {
+            tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2);
+        } else {
+            tcg_out_opc_add_w(s, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_add_i64:
+        if (c2) {
+            tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2);
+        } else {
+            tcg_out_opc_add_d(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_sub_i32:
+        if (c2) {
+            tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2);
+        } else {
+            tcg_out_opc_sub_w(s, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_sub_i64:
+        if (c2) {
+            tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2);
+        } else {
+            tcg_out_opc_sub_d(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_mul_i32:
+        tcg_out_opc_mul_w(s, a0, a1, a2);
+        break;
+    case INDEX_op_mul_i64:
+        tcg_out_opc_mul_d(s, a0, a1, a2);
+        break;
+
+    case INDEX_op_mulsh_i32:
+        tcg_out_opc_mulh_w(s, a0, a1, a2);
+        break;
+    case INDEX_op_mulsh_i64:
+        tcg_out_opc_mulh_d(s, a0, a1, a2);
+        break;
+
+    case INDEX_op_muluh_i32:
+        tcg_out_opc_mulh_wu(s, a0, a1, a2);
+        break;
+    case INDEX_op_muluh_i64:
+        tcg_out_opc_mulh_du(s, a0, a1, a2);
+        break;
+
+    case INDEX_op_div_i32:
+        tcg_out_opc_div_w(s, a0, a1, a2);
+        break;
+    case INDEX_op_div_i64:
+        tcg_out_opc_div_d(s, a0, a1, a2);
+        break;
+
+    case INDEX_op_divu_i32:
+        tcg_out_opc_div_wu(s, a0, a1, a2);
+        break;
+    case INDEX_op_divu_i64:
+        tcg_out_opc_div_du(s, a0, a1, a2);
+        break;
+
+    case INDEX_op_rem_i32:
+        tcg_out_opc_mod_w(s, a0, a1, a2);
+        break;
+    case INDEX_op_rem_i64:
+        tcg_out_opc_mod_d(s, a0, a1, a2);
+        break;
+
+    case INDEX_op_remu_i32:
+        tcg_out_opc_mod_wu(s, a0, a1, a2);
+        break;
+    case INDEX_op_remu_i64:
+        tcg_out_opc_mod_du(s, a0, a1, a2);
+        break;
+
+    case INDEX_op_setcond_i32:
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], a0, a1, a2, c2);
+        break;
+
+    case INDEX_op_movcond_i32:
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]);
+        break;
+
+    case INDEX_op_ld8s_i32:
+    case INDEX_op_ld8s_i64:
+        tcg_out_ldst(s, OPC_LD_B, a0, a1, a2);
+        break;
+    case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8u_i64:
+        tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2);
+        break;
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld16s_i64:
+        tcg_out_ldst(s, OPC_LD_H, a0, a1, a2);
+        break;
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16u_i64:
+        tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2);
+        break;
+    case INDEX_op_ld_i32:
+    case INDEX_op_ld32s_i64:
+        tcg_out_ldst(s, OPC_LD_W, a0, a1, a2);
+        break;
+    case INDEX_op_ld32u_i64:
+        tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2);
+        break;
+    case INDEX_op_ld_i64:
+        tcg_out_ldst(s, OPC_LD_D, a0, a1, a2);
+        break;
+
+    case INDEX_op_st8_i32:
+    case INDEX_op_st8_i64:
+        tcg_out_ldst(s, OPC_ST_B, a0, a1, a2);
+        break;
+    case INDEX_op_st16_i32:
+    case INDEX_op_st16_i64:
+        tcg_out_ldst(s, OPC_ST_H, a0, a1, a2);
+        break;
+    case INDEX_op_st_i32:
+    case INDEX_op_st32_i64:
+        tcg_out_ldst(s, OPC_ST_W, a0, a1, a2);
+        break;
+    case INDEX_op_st_i64:
+        tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
+        break;
+
+    case INDEX_op_qemu_ld_i32:
+        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
+        break;
+    case INDEX_op_qemu_ld_i64:
+        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
+        break;
+    case INDEX_op_qemu_st_i32:
+        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
+        break;
+    case INDEX_op_qemu_st_i64:
+        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
+        break;
+    case INDEX_op_goto_tb:
+        if (s->tb_jmp_insn_offset) {
+            /* TODO */
+            g_assert_not_reached();
+        } else {
+            /* indirect jump method */
+            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
+                       (uintptr_t)(s->tb_jmp_target_addr + a0));
+            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+        }
+        s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
+        break;
+    case INDEX_op_exit_tb:
+        tcg_out_exit_tb(s, a0);
+        break;
+
+    case INDEX_op_ext8s_i32:
+        tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1);
+        break;
+    case INDEX_op_ext8s_i64:
+        tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1);
+        break;
+    case INDEX_op_ext8u_i32:
+    case INDEX_op_ext8u_i64:
+        tcg_out_ext8u(s, a0, a1);
+        break;
+    case INDEX_op_ext16s_i32:
+        tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
+        break;
+    case INDEX_op_ext16s_i64:
+        tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1);
+        break;
+    case INDEX_op_ext16u_i32:
+    case INDEX_op_ext16u_i64:
+        tcg_out_ext16u(s, a0, a1);
+        break;
+    case INDEX_op_ext32s_i64:
+        tcg_out_ext32s(s, a0, a1);
+        break;
+    case INDEX_op_ext32u_i64:
+        tcg_out_ext32u(s, a0, a1);
+        break;
+    case INDEX_op_ext_i32_i64:
+        tcg_out_exts_i32_i64(s, a0, a1);
+        break;
+    case INDEX_op_extu_i32_i64:
+        tcg_out_extu_i32_i64(s, a0, a1);
+        break;
+    case INDEX_op_extrl_i64_i32:
+        tcg_out_extrl_i64_i32(s, a0, a1);
+        break;
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
+    // case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
+    // case INDEX_op_ext8s_i64:
+    // case INDEX_op_ext8u_i32:
+    // case INDEX_op_ext8u_i64:
+    // case INDEX_op_ext16s_i32:
+    // case INDEX_op_ext16s_i64:
+    // case INDEX_op_ext16u_i32:
+    // case INDEX_op_ext16u_i64:
+    // case INDEX_op_ext32s_i64:
+    // case INDEX_op_ext32u_i64:
+    // case INDEX_op_ext_i32_i64:
+    // case INDEX_op_extu_i32_i64:
+    // case INDEX_op_extrl_i64_i32:
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+                            TCGReg rd, TCGReg rs)
+{
+    switch (vece) {
+    case MO_8:
+        tcg_out_opc_vreplgr2vr_b(s, rd, rs);
+        break;
+    case MO_16:
+        tcg_out_opc_vreplgr2vr_h(s, rd, rs);
+        break;
+    case MO_32:
+        tcg_out_opc_vreplgr2vr_w(s, rd, rs);
+        break;
+    case MO_64:
+        tcg_out_opc_vreplgr2vr_d(s, rd, rs);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+                             TCGReg r, TCGReg base, intptr_t offset)
+{
+    /* Handle imm overflow and division (vldrepl.d imm is divided by 8) */
+    if (offset < -0x800 || offset > 0x7ff || \
+        (offset & ((1 << vece) - 1)) != 0) {
+        tcg_out_addi(s, TCG_TYPE_I64, TCG_REG_TMP0, base, offset);
+        base = TCG_REG_TMP0;
+        offset = 0;
+    }
+    offset >>= vece;
+
+    switch (vece) {
+    case MO_8:
+        tcg_out_opc_vldrepl_b(s, r, base, offset);
+        break;
+    case MO_16:
+        tcg_out_opc_vldrepl_h(s, r, base, offset);
+        break;
+    case MO_32:
+        tcg_out_opc_vldrepl_w(s, r, base, offset);
+        break;
+    case MO_64:
+        tcg_out_opc_vldrepl_d(s, r, base, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return true;
+}
+
+// static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+//                              TCGReg rd, int64_t v64)
+// {
+//     /* Try vldi if imm can fit */
+//     int64_t value = sextract64(v64, 0, 8 << vece);
+//     if (-0x200 <= value && value <= 0x1FF) {
+//         uint32_t imm = (vece << 10) | ((uint32_t)v64 & 0x3FF);
+//         tcg_out_opc_vldi(s, rd, imm);
+//         return;
+//     }
+
+//     /* TODO: vldi patterns when imm 12 is set */
+
+//     /* Fallback to vreplgr2vr */
+//     tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value);
+//     switch (vece) {
+//     case MO_8:
+//         tcg_out_opc_vreplgr2vr_b(s, rd, TCG_REG_TMP0);
+//         break;
+//     case MO_16:
+//         tcg_out_opc_vreplgr2vr_h(s, rd, TCG_REG_TMP0);
+//         break;
+//     case MO_32:
+//         tcg_out_opc_vreplgr2vr_w(s, rd, TCG_REG_TMP0);
+//         break;
+//     case MO_64:
+//         tcg_out_opc_vreplgr2vr_d(s, rd, TCG_REG_TMP0);
+//         break;
+//     default:
+//         g_assert_not_reached();
+//     }
+// }
+
+static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0,
+                               const TCGArg a1, const TCGArg a2,
+                               bool a2_is_const, bool is_add)
+{
+    static const LoongArchInsn add_vec_insn[4] = {
+        OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D
+    };
+    static const LoongArchInsn add_vec_imm_insn[4] = {
+        OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU
+    };
+    static const LoongArchInsn sub_vec_insn[4] = {
+        OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D
+    };
+    static const LoongArchInsn sub_vec_imm_insn[4] = {
+        OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU
+    };
+
+    if (a2_is_const) {
+        int64_t value = sextract64(a2, 0, 8 << vece);
+        if (!is_add) {
+            value = -value;
+        }
+
+        /* Try vaddi/vsubi */
+        if (0 <= value && value <= 0x1f) {
+            tcg_out32(s, encode_vdvjuk5_insn(add_vec_imm_insn[vece], a0, \
+                                             a1, value));
+            return;
+        } else if (-0x1f <= value && value < 0) {
+            tcg_out32(s, encode_vdvjuk5_insn(sub_vec_imm_insn[vece], a0, \
+                                             a1, -value));
+            return;
+        }
+
+        /* constraint TCG_CT_CONST_VADD ensures unreachable */
+        g_assert_not_reached();
+    }
+
+    if (is_add) {
+        tcg_out32(s, encode_vdvjvk_insn(add_vec_insn[vece], a0, a1, a2));
+    } else {
+        tcg_out32(s, encode_vdvjvk_insn(sub_vec_insn[vece], a0, a1, a2));
+    }
+}
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+                           unsigned vecl, unsigned vece,
+                           const TCGArg args[TCG_MAX_OP_ARGS],
+                           const int const_args[TCG_MAX_OP_ARGS])
+{
+    TCGType type = vecl + TCG_TYPE_V64;
+    TCGArg a0, a1, a2, a3;
+    TCGReg temp = TCG_REG_TMP0;
+    TCGReg temp_vec = TCG_VEC_TMP0;
+
+    static const LoongArchInsn cmp_vec_insn[16][4] = {
+        [TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D},
+        [TCG_COND_LE] = {OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D},
+        [TCG_COND_LEU] = {OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU},
+        [TCG_COND_LT] = {OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D},
+        [TCG_COND_LTU] = {OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU},
+    };
+    static const LoongArchInsn cmp_vec_imm_insn[16][4] = {
+        [TCG_COND_EQ] = {OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D},
+        [TCG_COND_LE] = {OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D},
+        [TCG_COND_LEU] = {OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, OPC_VSLEI_DU},
+        [TCG_COND_LT] = {OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D},
+        [TCG_COND_LTU] = {OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, OPC_VSLTI_DU},
+    };
+    LoongArchInsn insn;
+    static const LoongArchInsn neg_vec_insn[4] = {
+        OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D
+    };
+    static const LoongArchInsn mul_vec_insn[4] = {
+        OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D
+    };
+    static const LoongArchInsn smin_vec_insn[4] = {
+        OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D
+    };
+    static const LoongArchInsn umin_vec_insn[4] = {
+        OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU
+    };
+    static const LoongArchInsn smax_vec_insn[4] = {
+        OPC_VMAX_B, OPC_VMAX_H, OPC_VMAX_W, OPC_VMAX_D
+    };
+    static const LoongArchInsn umax_vec_insn[4] = {
+        OPC_VMAX_BU, OPC_VMAX_HU, OPC_VMAX_WU, OPC_VMAX_DU
+    };
+    static const LoongArchInsn ssadd_vec_insn[4] = {
+        OPC_VSADD_B, OPC_VSADD_H, OPC_VSADD_W, OPC_VSADD_D
+    };
+    static const LoongArchInsn usadd_vec_insn[4] = {
+        OPC_VSADD_BU, OPC_VSADD_HU, OPC_VSADD_WU, OPC_VSADD_DU
+    };
+    static const LoongArchInsn sssub_vec_insn[4] = {
+        OPC_VSSUB_B, OPC_VSSUB_H, OPC_VSSUB_W, OPC_VSSUB_D
+    };
+    static const LoongArchInsn ussub_vec_insn[4] = {
+        OPC_VSSUB_BU, OPC_VSSUB_HU, OPC_VSSUB_WU, OPC_VSSUB_DU
+    };
+    static const LoongArchInsn shlv_vec_insn[4] = {
+        OPC_VSLL_B, OPC_VSLL_H, OPC_VSLL_W, OPC_VSLL_D
+    };
+    static const LoongArchInsn shrv_vec_insn[4] = {
+        OPC_VSRL_B, OPC_VSRL_H, OPC_VSRL_W, OPC_VSRL_D
+    };
+    static const LoongArchInsn sarv_vec_insn[4] = {
+        OPC_VSRA_B, OPC_VSRA_H, OPC_VSRA_W, OPC_VSRA_D
+    };
+    static const LoongArchInsn shli_vec_insn[4] = {
+        OPC_VSLLI_B, OPC_VSLLI_H, OPC_VSLLI_W, OPC_VSLLI_D
+    };
+    static const LoongArchInsn shri_vec_insn[4] = {
+        OPC_VSRLI_B, OPC_VSRLI_H, OPC_VSRLI_W, OPC_VSRLI_D
+    };
+    static const LoongArchInsn sari_vec_insn[4] = {
+        OPC_VSRAI_B, OPC_VSRAI_H, OPC_VSRAI_W, OPC_VSRAI_D
+    };
+    static const LoongArchInsn rotrv_vec_insn[4] = {
+        OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D
+    };
+
+    a0 = args[0];
+    a1 = args[1];
+    a2 = args[2];
+    a3 = args[3];
+
+    /* Currently only supports V128 */
+    tcg_debug_assert(type == TCG_TYPE_V128);
+
+    switch (opc) {
+    case INDEX_op_st_vec:
+        /* Try to fit vst imm */
+        if (-0x800 <= a2 && a2 <= 0x7ff) {
+            tcg_out_opc_vst(s, a0, a1, a2);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
+            tcg_out_opc_vstx(s, a0, a1, temp);
+        }
+        break;
+    case INDEX_op_ld_vec:
+        /* Try to fit vld imm */
+        if (-0x800 <= a2 && a2 <= 0x7ff) {
+            tcg_out_opc_vld(s, a0, a1, a2);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
+            tcg_out_opc_vldx(s, a0, a1, temp);
+        }
+        break;
+    case INDEX_op_and_vec:
+        tcg_out_opc_vand_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_andc_vec:
+        /*
+         * vandn vd, vj, vk: vd = vk & ~vj
+         * andc_vec vd, vj, vk: vd = vj & ~vk
+         * vk and vk are swapped
+         */
+        tcg_out_opc_vandn_v(s, a0, a2, a1);
+        break;
+    case INDEX_op_or_vec:
+        tcg_out_opc_vor_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_orc_vec:
+        tcg_out_opc_vorn_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_xor_vec:
+        tcg_out_opc_vxor_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_not_vec:
+        tcg_out_opc_vnor_v(s, a0, a1, a1);
+        break;
+    case INDEX_op_cmp_vec:
+        {
+            TCGCond cond = args[3];
+            if (const_args[2]) {
+                /*
+                 * cmp_vec dest, src, value
+                 * Try vseqi/vslei/vslti
+                 */
+                int64_t value = sextract64(a2, 0, 8 << vece);
+                if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \
+                     cond == TCG_COND_LT) && (-0x10 <= value && value <= 0x0f)) {
+                    tcg_out32(s, encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], \
+                                                     a0, a1, value));
+                    break;
+                } else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) &&
+                    (0x00 <= value && value <= 0x1f)) {
+                    tcg_out32(s, encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], \
+                                                     a0, a1, value));
+                    break;
+                }
+
+                /*
+                 * Fallback to:
+                 * dupi_vec temp, a2
+                 * cmp_vec a0, a1, temp, cond
+                 */
+                // tcg_out_dupi_vec(s, type, vece, temp_vec, a2);
+                /* Try vldi if imm can fit */
+                if (-0x200 <= value && value <= 0x1FF) {
+                    uint32_t imm = (vece << 10) | ((uint32_t)a2 & 0x3FF);
+                    tcg_out_opc_vldi(s, temp_vec, imm);
+                    goto enddupi;
+                }
+
+                /* TODO: vldi patterns when imm 12 is set */
+
+                /* Fallback to vreplgr2vr */
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value);
+                switch (vece) {
+                case MO_8:
+                    tcg_out_opc_vreplgr2vr_b(s, temp_vec, TCG_REG_TMP0);
+                    break;
+                case MO_16:
+                    tcg_out_opc_vreplgr2vr_h(s, temp_vec, TCG_REG_TMP0);
+                    break;
+                case MO_32:
+                    tcg_out_opc_vreplgr2vr_w(s, temp_vec, TCG_REG_TMP0);
+                    break;
+                case MO_64:
+                    tcg_out_opc_vreplgr2vr_d(s, temp_vec, TCG_REG_TMP0);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+                enddupi:
+                a2 = temp_vec;
+            }
+
+            insn = cmp_vec_insn[cond][vece];
+            if (insn == 0) {
+                TCGArg t;
+                t = a1, a1 = a2, a2 = t;
+                cond = tcg_swap_cond(cond);
+                insn = cmp_vec_insn[cond][vece];
+                tcg_debug_assert(insn != 0);
+            }
+            tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
+        }
+        break;
+    case INDEX_op_add_vec:
+        tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], true);
+        break;
+    case INDEX_op_sub_vec:
+        tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], false);
+        break;
+    case INDEX_op_neg_vec:
+        tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1));
+        break;
+    case INDEX_op_mul_vec:
+        tcg_out32(s, encode_vdvjvk_insn(mul_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_smin_vec:
+        tcg_out32(s, encode_vdvjvk_insn(smin_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_smax_vec:
+        tcg_out32(s, encode_vdvjvk_insn(smax_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_umin_vec:
+        tcg_out32(s, encode_vdvjvk_insn(umin_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_umax_vec:
+        tcg_out32(s, encode_vdvjvk_insn(umax_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_ssadd_vec:
+        tcg_out32(s, encode_vdvjvk_insn(ssadd_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_usadd_vec:
+        tcg_out32(s, encode_vdvjvk_insn(usadd_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_sssub_vec:
+        tcg_out32(s, encode_vdvjvk_insn(sssub_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_ussub_vec:
+        tcg_out32(s, encode_vdvjvk_insn(ussub_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shlv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(shlv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shrv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(shrv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_sarv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(sarv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shli_vec:
+        tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shri_vec:
+        tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_sari_vec:
+        tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_bitsel_vec:
+        /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
+        tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
+        break;
+    case INDEX_op_dupm_vec:
+        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+// int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigned vece)
+// {
+//     switch (opc) {
+//     case INDEX_op_ld_vec:
+//     case INDEX_op_st_vec:
+//     case INDEX_op_dup_vec:
+//     case INDEX_op_cmp_vec:
+//     case INDEX_op_add_vec:
+//     case INDEX_op_sub_vec:
+//     case INDEX_op_and_vec:
+//     case INDEX_op_andc_vec:
+//     case INDEX_op_or_vec:
+//     case INDEX_op_orc_vec:
+//     case INDEX_op_xor_vec:
+//     case INDEX_op_not_vec:
+//     case INDEX_op_neg_vec:
+//     case INDEX_op_mul_vec:
+//     case INDEX_op_shlv_vec:
+//     case INDEX_op_shrv_vec:
+//     case INDEX_op_sarv_vec:
+//         return 1;
+//     default:
+//         return 0;
+//     }
+// }
+int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigned vece)
+{
+    switch (opc) {
+    case INDEX_op_ld_vec:
+    case INDEX_op_st_vec:
+    case INDEX_op_dup_vec:
+    case INDEX_op_dupm_vec:
+    case INDEX_op_cmp_vec:
+    case INDEX_op_add_vec:
+    case INDEX_op_sub_vec:
+    case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
+    case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
+    case INDEX_op_xor_vec:
+    case INDEX_op_not_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_mul_vec:
+    case INDEX_op_smin_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_umin_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_usadd_vec:
+    case INDEX_op_sssub_vec:
+    case INDEX_op_ussub_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+    case INDEX_op_bitsel_vec:
+        return 1;
+    default:
+        return 0;
+    }
+}
+
+void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigned vece,
+                       TCGArg a0, ...)
+{
+    g_assert_not_reached();
+}
+
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+{
+    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
+    static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
+    static const TCGTargetOpDef rZ_rZ = { .args_ct_str = { "rZ", "rZ" } };
+    //static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
+    //static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
+
+    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
+    static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
+
+    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
+    static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
+    static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
+    static const TCGTargetOpDef r_r_rC = { .args_ct_str = { "r", "r", "rC" } };
+    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
+    static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
+    static const TCGTargetOpDef r_r_rJ = { .args_ct_str = { "r", "r", "rJ" } };
+    static const TCGTargetOpDef r_r_rU = { .args_ct_str = { "r", "r", "rU" } };
+    static const TCGTargetOpDef r_r_rW = { .args_ct_str = { "r", "r", "rW" } };
+    static const TCGTargetOpDef r_r_rZ = { .args_ct_str = { "r", "r", "rZ" } };
+    static const TCGTargetOpDef r_0_rZ = { .args_ct_str = { "r", "0", "rZ" } };
+    static const TCGTargetOpDef r_rZ_ri = { .args_ct_str = { "r", "rZ", "ri" } };
+    static const TCGTargetOpDef r_rZ_rJ = { .args_ct_str = { "r", "rZ", "rJ" } };
+    static const TCGTargetOpDef r_rZ_rZ = { .args_ct_str = { "r", "rZ", "rZ" } };
+    static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
+    static const TCGTargetOpDef w_w_wM = { .args_ct_str = { "w", "w", "wM" } };
+    static const TCGTargetOpDef w_w_wA = { .args_ct_str = { "w", "w", "wA" } };
+    static const TCGTargetOpDef w_w_w_w = { .args_ct_str = { "w", "w", "w", "w" } };
+    static const TCGTargetOpDef r_rZ_rJ_rZ_rZ = { .args_ct_str = { "r", "rZ", "rJ", "rZ", "rZ" } };
+
+    switch (op) {
+    case INDEX_op_goto_ptr:
+        return &r;
+
+    case INDEX_op_st8_i32:
+    case INDEX_op_st8_i64:
+    case INDEX_op_st16_i32:
+    case INDEX_op_st16_i64:
+    case INDEX_op_st32_i64:
+    case INDEX_op_st_i32:
+    case INDEX_op_st_i64:
+        return &rZ_r;
+
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_ld_i64:
+        return &r_l;
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_i64:
+        return &lZ_l;
+
+    case INDEX_op_brcond_i32:
+    case INDEX_op_brcond_i64:
+        return &rZ_rZ;
+
+    case INDEX_op_ext8s_i32:
+    case INDEX_op_ext8s_i64:
+    case INDEX_op_ext8u_i32:
+    case INDEX_op_ext8u_i64:
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+    case INDEX_op_ext16u_i32:
+    case INDEX_op_ext16u_i64:
+    case INDEX_op_ext32s_i64:
+    case INDEX_op_ext32u_i64:
+    case INDEX_op_extu_i32_i64:
+    case INDEX_op_extrl_i64_i32:
+    case INDEX_op_extrh_i64_i32:
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_not_i32:
+    case INDEX_op_not_i64:
+    case INDEX_op_extract_i32:
+    case INDEX_op_extract_i64:
+    case INDEX_op_bswap16_i32:
+    case INDEX_op_bswap16_i64:
+    case INDEX_op_bswap32_i32:
+    case INDEX_op_bswap32_i64:
+    case INDEX_op_bswap64_i64:
+    case INDEX_op_ld8s_i32:
+    case INDEX_op_ld8s_i64:
+    case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8u_i64:
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld16s_i64:
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16u_i64:
+    case INDEX_op_ld32s_i64:
+    case INDEX_op_ld32u_i64:
+    case INDEX_op_ld_i32:
+    case INDEX_op_ld_i64:
+        return &r_r;
+
+    case INDEX_op_andc_i32:
+    case INDEX_op_andc_i64:
+    case INDEX_op_orc_i32:
+    case INDEX_op_orc_i64:
+        /*
+         * LoongArch insns for these ops don't have reg-imm forms, but we
+         * can express using andi/ori if ~constant satisfies
+         * TCG_CT_CONST_U12.
+         */
+        return &r_r_rC;
+
+    case INDEX_op_shl_i32:
+    case INDEX_op_shl_i64:
+    case INDEX_op_shr_i32:
+    case INDEX_op_shr_i64:
+    case INDEX_op_sar_i32:
+    case INDEX_op_sar_i64:
+    case INDEX_op_rotl_i32:
+    case INDEX_op_rotl_i64:
+    case INDEX_op_rotr_i32:
+    case INDEX_op_rotr_i64:
+    case INDEX_op_add_i32:
+        return &r_r_ri;
+
+    case INDEX_op_add_i64:
+        return &r_r_rJ;
+
+    case INDEX_op_and_i32:
+    case INDEX_op_and_i64:
+    case INDEX_op_nor_i32:
+    case INDEX_op_nor_i64:
+    case INDEX_op_or_i32:
+    case INDEX_op_or_i64:
+    case INDEX_op_xor_i32:
+    case INDEX_op_xor_i64:
+        /* LoongArch reg-imm bitops have their imms ZERO-extended */
+        return &r_r_rU;
+
+    case INDEX_op_clz_i32:
+    case INDEX_op_clz_i64:
+    case INDEX_op_ctz_i32:
+    case INDEX_op_ctz_i64:
+        return &r_r_rW;
+
+    case INDEX_op_deposit_i32:
+    case INDEX_op_deposit_i64:
+        /* Must deposit into the same register as input */
+        return &r_0_rZ;
+
+    case INDEX_op_sub_i32:
+    case INDEX_op_setcond_i32:
+        return &r_rZ_ri;
+    case INDEX_op_sub_i64:
+    case INDEX_op_setcond_i64:
+        return &r_rZ_rJ;
+
+    case INDEX_op_mul_i32:
+    case INDEX_op_mul_i64:
+    case INDEX_op_mulsh_i32:
+    case INDEX_op_mulsh_i64:
+    case INDEX_op_muluh_i32:
+    case INDEX_op_muluh_i64:
+    case INDEX_op_div_i32:
+    case INDEX_op_div_i64:
+    case INDEX_op_divu_i32:
+    case INDEX_op_divu_i64:
+    case INDEX_op_rem_i32:
+    case INDEX_op_rem_i64:
+    case INDEX_op_remu_i32:
+    case INDEX_op_remu_i64:
+        return &r_rZ_rZ;
+
+    case INDEX_op_movcond_i32:
+    case INDEX_op_movcond_i64:
+        return &r_rZ_rJ_rZ_rZ;
+
+   case INDEX_op_ld_vec:
+    case INDEX_op_dup_vec:
+    case INDEX_op_dupm_vec:
+    case INDEX_op_st_vec:
+        return &w_r;
+
+    case INDEX_op_cmp_vec:
+        return &w_w_wM;
+
+    case INDEX_op_add_vec:
+    case INDEX_op_sub_vec:
+        return &w_w_wA;
+
+    case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
+    case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
+    case INDEX_op_xor_vec:
+    case INDEX_op_mul_vec:
+
+    case INDEX_op_smin_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_umin_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_usadd_vec:
+    case INDEX_op_sssub_vec:
+    case INDEX_op_ussub_vec:
+
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+        return &w_w_w;
+
+    case INDEX_op_not_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_shli_vec:
+    case INDEX_op_shri_vec:
+    case INDEX_op_sari_vec:
+        return &w_w;
+
+    case INDEX_op_bitsel_vec:
+        return &w_w_w_w;
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static const int tcg_target_callee_save_regs[] = {
+    TCG_REG_S0,     /* used for the global env (TCG_AREG0) */
+    TCG_REG_S1,
+    TCG_REG_S2,
+    TCG_REG_S3,
+    TCG_REG_S4,
+    TCG_REG_S5,
+    TCG_REG_S6,
+    TCG_REG_S7,
+    TCG_REG_S8,
+    TCG_REG_S9,
+    TCG_REG_RA,     /* should be last for ABI compliance */
+};
+
+/* Stack frame parameters.  */
+#define REG_SIZE   (TCG_TARGET_REG_BITS / 8)
+#define SAVE_SIZE  ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
+#define TEMP_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
+#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
+                     + TCG_TARGET_STACK_ALIGN - 1) \
+                    & -TCG_TARGET_STACK_ALIGN)
+#define SAVE_OFS   (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
+
+/* We're expecting to be able to use an immediate for frame allocation.  */
+QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+    int i;
+
+    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
+
+    /* TB prologue */
+    tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
+    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+    }
+
+#if !defined(CONFIG_SOFTMMU)
+    if (USE_GUEST_BASE) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
+#endif
+
+    /* Call generated code */
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+    tcg_out_opc_jirl(s, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
+
+    /* Return path for goto_ptr. Set return value to 0 */
+    s->code_gen_epilogue = s->code_ptr;
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
+
+    /* TB epilogue */
+    s->tb_ret_addr = s->code_ptr;
+    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+    }
+
+    tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
+    tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0);
+}
+
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+#if 0
+    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+
+    /* Server and desktop class cpus have UAL; embedded cpus do not. */
+    if (!(hwcap & HWCAP_LOONGARCH_UAL)) {
+        vreport(REPORT_TYPE_ERROR, "%s\n", "TCG: unaligned access support required; exiting");
+        exit(EXIT_FAILURE);
+    }
+
+    if (hwcap & HWCAP_LOONGARCH_LSX) {
+        use_lsx_instructions = 1;
+    }
+#else
+    use_lsx_instructions = 1;
+#endif
+
+    s->tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
+    s->tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
+
+    s->tcg_target_call_clobber_regs = ALL_GENERAL_REGS;
+    tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S0);
+    tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S1);
+    tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S2);
+    tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S3);
+    tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S4);
+    tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S5);
+    tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S6);
+    tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S7);
+    tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S8);
+    tcg_regset_reset_reg(s->tcg_target_call_clobber_regs, TCG_REG_S9);
+
+    s->reserved_regs = 0;
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
+    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
+}
+
+typedef struct {
+    DebugFrameHeader h;
+    uint8_t fde_def_cfa[4];
+    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_LOONGARCH
+
+static const DebugFrame debug_frame = {
+    .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
+    .h.cie.id = -1,
+    .h.cie.version = 1,
+    .h.cie.code_align = 1,
+    .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
+    .h.cie.return_column = TCG_REG_RA,
+
+    /* Total FDE size does not include the "len" member.  */
+    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+    .fde_def_cfa = {
+        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ...  */
+        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
+        (FRAME_SIZE >> 7)
+    },
+    .fde_reg_ofs = {
+        0x80 + 23, 11,                  /* DW_CFA_offset, s0, -88 */
+        0x80 + 24, 10,                  /* DW_CFA_offset, s1, -80 */
+        0x80 + 25, 9,                   /* DW_CFA_offset, s2, -72 */
+        0x80 + 26, 8,                   /* DW_CFA_offset, s3, -64 */
+        0x80 + 27, 7,                   /* DW_CFA_offset, s4, -56 */
+        0x80 + 28, 6,                   /* DW_CFA_offset, s5, -48 */
+        0x80 + 29, 5,                   /* DW_CFA_offset, s6, -40 */
+        0x80 + 30, 4,                   /* DW_CFA_offset, s7, -32 */
+        0x80 + 31, 3,                   /* DW_CFA_offset, s8, -24 */
+        0x80 + 22, 2,                   /* DW_CFA_offset, s9, -16 */
+        0x80 + 1 , 1,                   /* DW_CFA_offset, ra, -8 */
+    }
+};
+
+void tcg_register_jit(TCGContext *s, void *buf, size_t buf_size)
+{
+    tcg_register_jit_int(s, buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/qemu/tcg/loongarch64/tcg-target.opc.h b/qemu/tcg/loongarch64/tcg-target.opc.h
new file mode 100644
index 0000000000..4816a6c3d4
--- /dev/null
+++ b/qemu/tcg/loongarch64/tcg-target.opc.h
@@ -0,0 +1,3 @@
+/* Target-specific opcodes for host vector expansion.  These will be
+   emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
+   consider these to be UNSPEC with names.  */